From d7040813712a7c7474fa77b5f61c4966b50f3173 Mon Sep 17 00:00:00 2001 From: Jiashen Cao Date: Wed, 16 Aug 2023 16:50:59 -0400 Subject: [PATCH 1/5] add USE --- evadb/parser/evadb.lark | 14 +++++- evadb/parser/lark_visitor/__init__.py | 2 + evadb/parser/lark_visitor/_use_statement.py | 28 ++++++++++++ evadb/parser/types.py | 1 + evadb/parser/use_statement.py | 50 +++++++++++++++++++++ test/parser/test_parser.py | 26 +++++++++++ 6 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 evadb/parser/lark_visitor/_use_statement.py create mode 100644 evadb/parser/use_statement.py diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index f041016b73..bf34ff77b0 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -2,7 +2,7 @@ start: (sql_statement? ";")+ -sql_statement: ddl_statement | dml_statement | utility_statement +sql_statement: ddl_statement | dml_statement | utility_statement | context_statement ddl_statement: create_database | create_table | create_index | create_udf | drop_database | drop_table | drop_udf | drop_index | rename_table @@ -12,6 +12,8 @@ dml_statement: select_statement | insert_statement | update_statement utility_statement: describe_statement | show_statement | help_statement | explain_statement +context_statement: use_statement + // Data Definition Language // Create statements @@ -172,11 +174,19 @@ explain_statement: EXPLAIN explainable_statement explainable_statement : select_statement | insert_statement | update_statement | delete_statement | create_table +// Context Statements + +use_statement: USE database_name "(" query_string ")" // One shortcoming that query string cannot have parenthesis + // Common Clauses // DB Objects +query_string: QUERY_STRING + full_id: uid dotted_id? + +database_name: full_id table_name: full_id @@ -360,6 +370,7 @@ PARAMETERS: "PARAMETERS"i PRIMARY: "PRIMARY"i REFERENCES: "REFERENCES"i RENAME: "RENAME"i +USE: "USE"i SAMPLE: "SAMPLE"i IFRAMES: "IFRAMES"i AUDIORATE: "AUDIORATE"i @@ -556,6 +567,7 @@ ID_LITERAL: /[A-Za-z_$0-9]*?[A-Za-z_$]+?[A-Za-z_$0-9]*/ DQUOTA_STRING: /"[^";]*"/ SQUOTA_STRING: /'[^';]*'/ BQUOTA_STRING: /`[^'`]*`/ +QUERY_STRING: /[^();]+/ DEC_DIGIT: /[0-9]/ // LARK diff --git a/evadb/parser/lark_visitor/__init__.py b/evadb/parser/lark_visitor/__init__.py index c27cc6a6a1..220f2a6d50 100644 --- a/evadb/parser/lark_visitor/__init__.py +++ b/evadb/parser/lark_visitor/__init__.py @@ -29,6 +29,7 @@ from evadb.parser.lark_visitor._select_statement import Select from evadb.parser.lark_visitor._show_statements import Show from evadb.parser.lark_visitor._table_sources import TableSources +from evadb.parser.lark_visitor._use_statement import Use # To add new functionality to the parser, create a new file under # the lark_visitor directory, and implement a new class which @@ -70,6 +71,7 @@ class LarkInterpreter( Show, Explain, Delete, + Use, ): def __init__(self, query): super().__init__() diff --git a/evadb/parser/lark_visitor/_use_statement.py b/evadb/parser/lark_visitor/_use_statement.py new file mode 100644 index 0000000000..756349c644 --- /dev/null +++ b/evadb/parser/lark_visitor/_use_statement.py @@ -0,0 +1,28 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from lark import Tree + +from evadb.parser.use_statement import UseStatement + + +class Use: + def use_statement(self, tree): + for child in tree.children: + if isinstance(child, Tree): + if child.data == "database_name": + database_name = self.visit(child) + if child.data == "query_string": + query_string = self.visit(child) + return UseStatement(database_name, query_string) \ No newline at end of file diff --git a/evadb/parser/types.py b/evadb/parser/types.py index 406322c9f6..18b770fa8b 100644 --- a/evadb/parser/types.py +++ b/evadb/parser/types.py @@ -40,6 +40,7 @@ class StatementType(EvaDBEnum): EXPLAIN # noqa: F821 CREATE_INDEX # noqa: F821 CREATE_DATABASE # noqa: F821 + USE # noqa: F821 # add other types diff --git a/evadb/parser/use_statement.py b/evadb/parser/use_statement.py new file mode 100644 index 0000000000..4945925913 --- /dev/null +++ b/evadb/parser/use_statement.py @@ -0,0 +1,50 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from evadb.parser.statement import AbstractStatement +from evadb.parser.types import StatementType + + +class UseStatement(AbstractStatement): + def __init__(self, database_name: str, query_string: str): + super().__init__(StatementType.USE) + self._database_name = database_name + self._query_string = query_string + + @property + def database_name(self): + return self._database_name + + @property + def query_string(self): + return self._query_string + + def __str__(self): + return f"USE {self.database_name} ({self.query_string})" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, UseStatement): + return False + return self.database_name == other.database_name and self.query_string == other.query_string + + def __hash__(self) -> int: + return hash( + ( + super().__hash__(), + self.database_name, + self.query_string, + ) + ) diff --git a/test/parser/test_parser.py b/test/parser/test_parser.py index 8a8e7f07c4..2e5386aa8b 100644 --- a/test/parser/test_parser.py +++ b/test/parser/test_parser.py @@ -36,6 +36,7 @@ from evadb.parser.parser import Parser from evadb.parser.rename_statement import RenameTableStatement from evadb.parser.select_statement import SelectStatement +from evadb.parser.use_statement import UseStatement from evadb.parser.statement import AbstractStatement, StatementType from evadb.parser.table_ref import JoinNode, TableInfo, TableRef, TableValuedExpression from evadb.parser.types import ( @@ -50,6 +51,31 @@ class ParserTests(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + def test_use_statement(self): + parser = Parser() + + query_list = [ + "SELECT * FROM DemoTable", + """SELECT * FROM DemoTable WHERE col == "xxx" + """, + """SELECT * FROM DemoTable WHERE col == 'xxx' + """, + ] + + for query in query_list: + use_query = f"USE DemoDB ({query});" + evadb_stmt_list = parser.parse(use_query) + + # check stmt itself + self.assertIsInstance(evadb_stmt_list, list) + self.assertEqual(len(evadb_stmt_list), 1) + self.assertEqual(evadb_stmt_list[0].stmt_type, StatementType.USE) + + expected_stmt = UseStatement("DemoDB", query) + actual_stmt = evadb_stmt_list[0] + print(actual_stmt.query_string, expected_stmt.query_string) + self.assertEqual(actual_stmt, expected_stmt) + def test_create_index_statement(self): parser = Parser() From 1e23bd94f0f5048825425c78b18925cf6ac1ce17 Mon Sep 17 00:00:00 2001 From: Jiashen Cao Date: Wed, 16 Aug 2023 22:24:59 -0400 Subject: [PATCH 2/5] a full pass of USE statement --- evadb/executor/plan_executor.py | 3 ++ evadb/executor/sqlalchemy_executor.py | 36 ++++++++++++++ evadb/optimizer/operators.py | 35 +++++++++++++ evadb/optimizer/rules/rules.py | 19 +++++++ evadb/optimizer/rules/rules_base.py | 2 + evadb/optimizer/rules/rules_manager.py | 2 + evadb/optimizer/statement_to_opr_converter.py | 11 +++++ evadb/plan_nodes/native_plan.py | 49 +++++++++++++++++++ evadb/plan_nodes/types.py | 2 + .../integration_tests/test_native_executor.py | 41 ++++++++++++++++ 10 files changed, 200 insertions(+) create mode 100644 evadb/executor/sqlalchemy_executor.py create mode 100644 evadb/plan_nodes/native_plan.py create mode 100644 test/integration_tests/test_native_executor.py diff --git a/evadb/executor/plan_executor.py b/evadb/executor/plan_executor.py index 8ab6460540..ad188276a4 100644 --- a/evadb/executor/plan_executor.py +++ b/evadb/executor/plan_executor.py @@ -46,6 +46,7 @@ from evadb.executor.storage_executor import StorageExecutor from evadb.executor.union_executor import UnionExecutor from evadb.executor.vector_index_scan_executor import VectorIndexScanExecutor +from evadb.executor.sqlalchemy_executor import SQLAlchemyExecutor from evadb.models.storage.batch import Batch from evadb.parser.create_statement import CreateDatabaseStatement from evadb.parser.statement import AbstractStatement @@ -152,6 +153,8 @@ def _build_execution_tree( executor_node = VectorIndexScanExecutor(db=self._db, node=plan) elif plan_opr_type == PlanOprType.DELETE: executor_node = DeleteExecutor(db=self._db, node=plan) + elif plan_opr_type == PlanOprType.SQLALCHEMY: + executor_node = SQLAlchemyExecutor(db=self._db, node=plan) # EXPLAIN does not need to build execution tree for its children if plan_opr_type != PlanOprType.EXPLAIN: diff --git a/evadb/executor/sqlalchemy_executor.py b/evadb/executor/sqlalchemy_executor.py new file mode 100644 index 0000000000..b5a3c1b8e4 --- /dev/null +++ b/evadb/executor/sqlalchemy_executor.py @@ -0,0 +1,36 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Iterator + +from evadb.database import EvaDBDatabase +from evadb.executor.abstract_executor import AbstractExecutor +from evadb.models.storage.batch import Batch +from evadb.plan_nodes.native_plan import SQLAlchemyPlan + + +class SQLAlchemyExecutor(AbstractExecutor): + """ + Execute SQL through SQLAlchemy engine. + """ + + def __init__(self, db: EvaDBDatabase, node: SQLAlchemyPlan): + super().__init__(db, node) + self._database_name = node.database_name + self._query_string = node.query_string + + def exec(self, *args, **kwargs) -> Iterator[Batch]: + print(self._query_string) + import pandas as pd + yield Batch(pd.DataFrame({"status": ["Ok"]})) diff --git a/evadb/optimizer/operators.py b/evadb/optimizer/operators.py index cdc257a1fb..ef175ad313 100644 --- a/evadb/optimizer/operators.py +++ b/evadb/optimizer/operators.py @@ -62,6 +62,7 @@ class OperatorType(IntEnum): LOGICAL_APPLY_AND_MERGE = auto() LOGICAL_EXTRACT_OBJECT = auto() LOGICAL_VECTOR_INDEX_SCAN = auto() + LOGICAL_USE = auto() LOGICALDELIMITER = auto() @@ -1239,3 +1240,37 @@ def __hash__(self) -> int: self.search_query_expr, ) ) + + +class LogicalUse(Operator): + def __init__(self, database_name: str, query_string: str, children: List = None): + super().__init__(OperatorType.LOGICAL_USE, children) + self._database_name = database_name + self._query_string = query_string + + @property + def database_name(self): + return self._database_name + + @property + def query_string(self): + return self._query_string + + def __eq__(self, other): + is_subtree_equal = super().__eq__(other) + if not isinstance(other, LogicalUse): + return False + return ( + is_subtree_equal + and self.database_name == other.database_name + and self.query_string == other.query_string + ) + + def __hash__(self) -> int: + return hash( + ( + super().__hash__(), + self.database_name, + self.query_string, + ) + ) diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py index 3c5f39c3a6..a2325f2f04 100644 --- a/evadb/optimizer/rules/rules.py +++ b/evadb/optimizer/rules/rules.py @@ -47,6 +47,7 @@ from evadb.plan_nodes.predicate_plan import PredicatePlan from evadb.plan_nodes.project_plan import ProjectPlan from evadb.plan_nodes.show_info_plan import ShowInfoPlan +from evadb.plan_nodes.native_plan import SQLAlchemyPlan if TYPE_CHECKING: from evadb.optimizer.optimizer_context import OptimizerContext @@ -78,6 +79,7 @@ LogicalShow, LogicalUnion, LogicalVectorIndexScan, + LogicalUse, Operator, OperatorType, ) @@ -99,6 +101,7 @@ from evadb.plan_nodes.storage_plan import StoragePlan from evadb.plan_nodes.union_plan import UnionPlan from evadb.plan_nodes.vector_index_scan_plan import VectorIndexScanPlan +from evadb.plan_nodes.native_plan import SQLAlchemyPlan ############################################## # REWRITE RULES START @@ -1331,5 +1334,21 @@ def apply(self, before: LogicalProject, context: OptimizerContext): yield exchange_plan +class LogicalUseToPhysical(Rule): + def __init__(self): + pattern = Pattern(OperatorType.LOGICAL_USE) + super().__init__(RuleType.LOGICAL_USE_TO_PHYSICAL, pattern) + + def promise(self): + return Promise.LOGICAL_USE_TO_PHYSICAL + + def check(self, before: LogicalUse, context: OptimizerContext): + return True + + def apply(self, before: LogicalUse, context: OptimizerContext): + # TODO: convert to different physical plan based on USE operator + yield SQLAlchemyPlan(before.database_name, before.query_string) + + # IMPLEMENTATION RULES END ############################################## diff --git a/evadb/optimizer/rules/rules_base.py b/evadb/optimizer/rules/rules_base.py index c2a2907bca..00fb0de9ad 100644 --- a/evadb/optimizer/rules/rules_base.py +++ b/evadb/optimizer/rules/rules_base.py @@ -82,6 +82,7 @@ class RuleType(Flag): LOGICAL_CREATE_INDEX_TO_VECTOR_INDEX = auto() LOGICAL_APPLY_AND_MERGE_TO_PHYSICAL = auto() LOGICAL_VECTOR_INDEX_SCAN_TO_PHYSICAL = auto() + LOGICAL_USE_TO_PHYSICAL = auto() IMPLEMENTATION_DELIMITER = auto() NUM_RULES = auto() @@ -124,6 +125,7 @@ class Promise(IntEnum): LOGICAL_CREATE_INDEX_TO_VECTOR_INDEX = auto() LOGICAL_APPLY_AND_MERGE_TO_PHYSICAL = auto() LOGICAL_VECTOR_INDEX_SCAN_TO_PHYSICAL = auto() + LOGICAL_USE_TO_PHYSICAL = auto() # IMPLEMENTATION DELIMITER IMPLEMENTATION_DELIMITER = auto() diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py index a8f1053200..cc62bfc1b9 100644 --- a/evadb/optimizer/rules/rules_manager.py +++ b/evadb/optimizer/rules/rules_manager.py @@ -54,6 +54,7 @@ LogicalShowToPhysical, LogicalUnionToPhysical, LogicalVectorIndexScanToPhysical, + LogicalUseToPhysical, PushDownFilterThroughApplyAndMerge, PushDownFilterThroughJoin, ReorderPredicates, @@ -112,6 +113,7 @@ def __init__(self, config: ConfigurationManager): LogicalExplainToPhysical(), LogicalCreateIndexToVectorIndex(), LogicalVectorIndexScanToPhysical(), + LogicalUseToPhysical(), ] # These rules are enabled only if diff --git a/evadb/optimizer/statement_to_opr_converter.py b/evadb/optimizer/statement_to_opr_converter.py index fe1387fd70..7654c8e53a 100644 --- a/evadb/optimizer/statement_to_opr_converter.py +++ b/evadb/optimizer/statement_to_opr_converter.py @@ -36,6 +36,7 @@ LogicalSample, LogicalShow, LogicalUnion, + LogicalUse, ) from evadb.optimizer.optimizer_utils import ( column_definition_to_udf_io, @@ -53,6 +54,7 @@ from evadb.parser.select_statement import SelectStatement from evadb.parser.show_statement import ShowStatement from evadb.parser.statement import AbstractStatement +from evadb.parser.use_statement import UseStatement from evadb.parser.table_ref import TableRef from evadb.parser.types import UDFType from evadb.utils.logging_manager import logger @@ -318,6 +320,13 @@ def visit_delete(self, statement: DeleteTableStatement): ) self._plan = delete_opr + def visit_use(self, statement: UseStatement): + use_opr = LogicalUse( + statement.database_name, + statement.query_string, + ) + self._plan = use_opr + def visit(self, statement: AbstractStatement): """Based on the instance of the statement the corresponding visit is called. @@ -348,6 +357,8 @@ def visit(self, statement: AbstractStatement): self.visit_create_index(statement) elif isinstance(statement, DeleteTableStatement): self.visit_delete(statement) + elif isinstance(statement, UseStatement): + self.visit_use(statement) return self._plan @property diff --git a/evadb/plan_nodes/native_plan.py b/evadb/plan_nodes/native_plan.py new file mode 100644 index 0000000000..f5b37129c5 --- /dev/null +++ b/evadb/plan_nodes/native_plan.py @@ -0,0 +1,49 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from evadb.plan_nodes.abstract_plan import AbstractPlan +from evadb.plan_nodes.types import PlanOprType + + +class NativePlan(AbstractPlan): + """ + This plan is used for pushing down query string directly to + backend database engine. + """ + def __init__(self, plan_type: PlanOprType, database_name: str, query_string: str): + self._database_name = database_name + self._query_string = query_string + super().__init__(plan_type) + + @property + def database_name(self): + return self._database_name + + @property + def query_string(self): + return self._query_string + + def __str__(self): + return "NativePlan(database_name={}, query_string={})".format( + self._database_name, + self._query_string, + ) + + def __hash__(self) -> int: + return hash((super().__hash__(), self._database_name, self._query_string)) + + +class SQLAlchemyPlan(NativePlan): + def __init__(self, database_name: str, query_string: str): + super().__init__(PlanOprType.SQLALCHEMY, database_name, query_string) \ No newline at end of file diff --git a/evadb/plan_nodes/types.py b/evadb/plan_nodes/types.py index 59fab6e1fd..f7e43117e8 100644 --- a/evadb/plan_nodes/types.py +++ b/evadb/plan_nodes/types.py @@ -46,4 +46,6 @@ class PlanOprType(Enum): CREATE_INDEX = auto() APPLY_AND_MERGE = auto() VECTOR_INDEX_SCAN = auto() + NATIVE = auto() + SQLALCHEMY = auto() # add other types diff --git a/test/integration_tests/test_native_executor.py b/test/integration_tests/test_native_executor.py new file mode 100644 index 0000000000..b4ec42091e --- /dev/null +++ b/test/integration_tests/test_native_executor.py @@ -0,0 +1,41 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from test.util import ( + get_evadb_for_testing, + shutdown_ray, +) + +import pytest + +from evadb.server.command_handler import execute_query_fetch_all +from evadb.utils.logging_manager import logger + + +@pytest.mark.notparallel +class NativeExecutorTest(unittest.TestCase): + def setUp(self): + self.evadb = get_evadb_for_testing() + # reset the catalog manager before running each test + self.evadb.catalog().reset() + + def tearDown(self): + shutdown_ray() + + def test_should_run_simple_query_in_sqlalchemy(self): + execute_query_fetch_all( + self.evadb, + "USE DemoDB (SELECT * FROM table);", + ) From 7e4429b06d6a71c89b9e4d2321429bf323b99aa5 Mon Sep 17 00:00:00 2001 From: Jiashen Cao Date: Wed, 16 Aug 2023 22:27:02 -0400 Subject: [PATCH 3/5] reformat --- evadb/executor/plan_executor.py | 2 +- evadb/executor/sqlalchemy_executor.py | 1 + evadb/optimizer/rules/rules.py | 5 ++--- evadb/optimizer/rules/rules_manager.py | 2 +- evadb/optimizer/statement_to_opr_converter.py | 2 +- evadb/parser/lark_visitor/_use_statement.py | 2 +- evadb/parser/use_statement.py | 7 +++++-- evadb/plan_nodes/native_plan.py | 3 ++- test/integration_tests/test_native_executor.py | 6 +----- test/parser/test_parser.py | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/evadb/executor/plan_executor.py b/evadb/executor/plan_executor.py index ad188276a4..e45da4960e 100644 --- a/evadb/executor/plan_executor.py +++ b/evadb/executor/plan_executor.py @@ -43,10 +43,10 @@ from evadb.executor.sample_executor import SampleExecutor from evadb.executor.seq_scan_executor import SequentialScanExecutor from evadb.executor.show_info_executor import ShowInfoExecutor +from evadb.executor.sqlalchemy_executor import SQLAlchemyExecutor from evadb.executor.storage_executor import StorageExecutor from evadb.executor.union_executor import UnionExecutor from evadb.executor.vector_index_scan_executor import VectorIndexScanExecutor -from evadb.executor.sqlalchemy_executor import SQLAlchemyExecutor from evadb.models.storage.batch import Batch from evadb.parser.create_statement import CreateDatabaseStatement from evadb.parser.statement import AbstractStatement diff --git a/evadb/executor/sqlalchemy_executor.py b/evadb/executor/sqlalchemy_executor.py index b5a3c1b8e4..40dfd19857 100644 --- a/evadb/executor/sqlalchemy_executor.py +++ b/evadb/executor/sqlalchemy_executor.py @@ -33,4 +33,5 @@ def __init__(self, db: EvaDBDatabase, node: SQLAlchemyPlan): def exec(self, *args, **kwargs) -> Iterator[Batch]: print(self._query_string) import pandas as pd + yield Batch(pd.DataFrame({"status": ["Ok"]})) diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py index a2325f2f04..4f51c5bc7b 100644 --- a/evadb/optimizer/rules/rules.py +++ b/evadb/optimizer/rules/rules.py @@ -47,7 +47,6 @@ from evadb.plan_nodes.predicate_plan import PredicatePlan from evadb.plan_nodes.project_plan import ProjectPlan from evadb.plan_nodes.show_info_plan import ShowInfoPlan -from evadb.plan_nodes.native_plan import SQLAlchemyPlan if TYPE_CHECKING: from evadb.optimizer.optimizer_context import OptimizerContext @@ -78,8 +77,8 @@ LogicalSample, LogicalShow, LogicalUnion, - LogicalVectorIndexScan, LogicalUse, + LogicalVectorIndexScan, Operator, OperatorType, ) @@ -95,13 +94,13 @@ from evadb.plan_nodes.lateral_join_plan import LateralJoinPlan from evadb.plan_nodes.limit_plan import LimitPlan from evadb.plan_nodes.load_data_plan import LoadDataPlan +from evadb.plan_nodes.native_plan import SQLAlchemyPlan from evadb.plan_nodes.orderby_plan import OrderByPlan from evadb.plan_nodes.rename_plan import RenamePlan from evadb.plan_nodes.seq_scan_plan import SeqScanPlan from evadb.plan_nodes.storage_plan import StoragePlan from evadb.plan_nodes.union_plan import UnionPlan from evadb.plan_nodes.vector_index_scan_plan import VectorIndexScanPlan -from evadb.plan_nodes.native_plan import SQLAlchemyPlan ############################################## # REWRITE RULES START diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py index cc62bfc1b9..df258716c0 100644 --- a/evadb/optimizer/rules/rules_manager.py +++ b/evadb/optimizer/rules/rules_manager.py @@ -53,8 +53,8 @@ LogicalRenameToPhysical, LogicalShowToPhysical, LogicalUnionToPhysical, - LogicalVectorIndexScanToPhysical, LogicalUseToPhysical, + LogicalVectorIndexScanToPhysical, PushDownFilterThroughApplyAndMerge, PushDownFilterThroughJoin, ReorderPredicates, diff --git a/evadb/optimizer/statement_to_opr_converter.py b/evadb/optimizer/statement_to_opr_converter.py index 7654c8e53a..e7867ade6d 100644 --- a/evadb/optimizer/statement_to_opr_converter.py +++ b/evadb/optimizer/statement_to_opr_converter.py @@ -54,9 +54,9 @@ from evadb.parser.select_statement import SelectStatement from evadb.parser.show_statement import ShowStatement from evadb.parser.statement import AbstractStatement -from evadb.parser.use_statement import UseStatement from evadb.parser.table_ref import TableRef from evadb.parser.types import UDFType +from evadb.parser.use_statement import UseStatement from evadb.utils.logging_manager import logger diff --git a/evadb/parser/lark_visitor/_use_statement.py b/evadb/parser/lark_visitor/_use_statement.py index 756349c644..dda5e446a3 100644 --- a/evadb/parser/lark_visitor/_use_statement.py +++ b/evadb/parser/lark_visitor/_use_statement.py @@ -25,4 +25,4 @@ def use_statement(self, tree): database_name = self.visit(child) if child.data == "query_string": query_string = self.visit(child) - return UseStatement(database_name, query_string) \ No newline at end of file + return UseStatement(database_name, query_string) diff --git a/evadb/parser/use_statement.py b/evadb/parser/use_statement.py index 4945925913..c0b1c5e9c0 100644 --- a/evadb/parser/use_statement.py +++ b/evadb/parser/use_statement.py @@ -38,12 +38,15 @@ def __str__(self): def __eq__(self, other: object) -> bool: if not isinstance(other, UseStatement): return False - return self.database_name == other.database_name and self.query_string == other.query_string + return ( + self.database_name == other.database_name + and self.query_string == other.query_string + ) def __hash__(self) -> int: return hash( ( - super().__hash__(), + super().__hash__(), self.database_name, self.query_string, ) diff --git a/evadb/plan_nodes/native_plan.py b/evadb/plan_nodes/native_plan.py index f5b37129c5..832a6bf971 100644 --- a/evadb/plan_nodes/native_plan.py +++ b/evadb/plan_nodes/native_plan.py @@ -21,6 +21,7 @@ class NativePlan(AbstractPlan): This plan is used for pushing down query string directly to backend database engine. """ + def __init__(self, plan_type: PlanOprType, database_name: str, query_string: str): self._database_name = database_name self._query_string = query_string @@ -46,4 +47,4 @@ def __hash__(self) -> int: class SQLAlchemyPlan(NativePlan): def __init__(self, database_name: str, query_string: str): - super().__init__(PlanOprType.SQLALCHEMY, database_name, query_string) \ No newline at end of file + super().__init__(PlanOprType.SQLALCHEMY, database_name, query_string) diff --git a/test/integration_tests/test_native_executor.py b/test/integration_tests/test_native_executor.py index b4ec42091e..4ea5394ab8 100644 --- a/test/integration_tests/test_native_executor.py +++ b/test/integration_tests/test_native_executor.py @@ -13,15 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import ( - get_evadb_for_testing, - shutdown_ray, -) +from test.util import get_evadb_for_testing, shutdown_ray import pytest from evadb.server.command_handler import execute_query_fetch_all -from evadb.utils.logging_manager import logger @pytest.mark.notparallel diff --git a/test/parser/test_parser.py b/test/parser/test_parser.py index 2e5386aa8b..e1fbc55e11 100644 --- a/test/parser/test_parser.py +++ b/test/parser/test_parser.py @@ -36,7 +36,6 @@ from evadb.parser.parser import Parser from evadb.parser.rename_statement import RenameTableStatement from evadb.parser.select_statement import SelectStatement -from evadb.parser.use_statement import UseStatement from evadb.parser.statement import AbstractStatement, StatementType from evadb.parser.table_ref import JoinNode, TableInfo, TableRef, TableValuedExpression from evadb.parser.types import ( @@ -45,6 +44,7 @@ ObjectType, ParserOrderBySortType, ) +from evadb.parser.use_statement import UseStatement class ParserTests(unittest.TestCase): From 1ccd3c904b97b7a98701605c9e8c13fef481c406 Mon Sep 17 00:00:00 2001 From: Jiashen Cao Date: Thu, 17 Aug 2023 14:10:09 -0400 Subject: [PATCH 4/5] update integration test --- evadb/catalog/catalog_utils.py | 9 +++ evadb/executor/sqlalchemy_executor.py | 23 +++++++- evadb/parser/evadb.lark | 4 +- .../integration_tests/test_native_executor.py | 57 ++++++++++++++++++- test/parser/test_parser.py | 3 +- 5 files changed, 88 insertions(+), 8 deletions(-) diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py index d3fe528104..8be6648976 100644 --- a/evadb/catalog/catalog_utils.py +++ b/evadb/catalog/catalog_utils.py @@ -33,12 +33,21 @@ ) from evadb.catalog.sql_config import IDENTIFIER_COLUMN from evadb.configuration.configuration_manager import ConfigurationManager +from evadb.executor.executor_utils import ExecutorError from evadb.expression.function_expression import FunctionExpression from evadb.expression.tuple_value_expression import TupleValueExpression from evadb.parser.create_statement import ColConstraintInfo, ColumnDefinition from evadb.utils.generic_utils import get_str_hash, remove_directory_contents +def generate_sqlalchemy_conn_str(engine: str, params: Dict[str, str]): + if engine == "postgres": + conn_str = f"""postgresql://{params["user"]}:{params["password"]}@{params["host"]}:{params["port"]}/{params["database"]}""" + else: + raise ExecutorError(f"Native engine: {engine} is not currently supported") + return conn_str + + def is_video_table(table: TableCatalogEntry): return table.table_type == TableType.VIDEO_DATA diff --git a/evadb/executor/sqlalchemy_executor.py b/evadb/executor/sqlalchemy_executor.py index 40dfd19857..0847e42799 100644 --- a/evadb/executor/sqlalchemy_executor.py +++ b/evadb/executor/sqlalchemy_executor.py @@ -14,6 +14,10 @@ # limitations under the License. from typing import Iterator +import pandas as pd +from sqlalchemy import create_engine + +from evadb.catalog.catalog_utils import generate_sqlalchemy_conn_str from evadb.database import EvaDBDatabase from evadb.executor.abstract_executor import AbstractExecutor from evadb.models.storage.batch import Batch @@ -31,7 +35,20 @@ def __init__(self, db: EvaDBDatabase, node: SQLAlchemyPlan): self._query_string = node.query_string def exec(self, *args, **kwargs) -> Iterator[Batch]: - print(self._query_string) - import pandas as pd + db_catalog_entry = self.db.catalog().get_database_catalog_entry( + self._database_name + ) + + conn_str = generate_sqlalchemy_conn_str( + db_catalog_entry.engine, + db_catalog_entry.params, + ) + + engine = create_engine(conn_str) - yield Batch(pd.DataFrame({"status": ["Ok"]})) + with engine.connect() as con: + if "SELECT" in self._query_string or "select" in self._query_string: + yield Batch(pd.read_sql(self._query_string, engine)) + else: + con.execute(self._query_string) + yield Batch(pd.DataFrame({"status": ["Ok"]})) diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index bf34ff77b0..76e50c67bc 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -176,7 +176,7 @@ explainable_statement : select_statement | insert_statement | update_statement | // Context Statements -use_statement: USE database_name "(" query_string ")" // One shortcoming that query string cannot have parenthesis +use_statement: USE database_name "{" query_string "}" // One shortcoming that query string cannot have parenthesis // Common Clauses @@ -567,7 +567,7 @@ ID_LITERAL: /[A-Za-z_$0-9]*?[A-Za-z_$]+?[A-Za-z_$0-9]*/ DQUOTA_STRING: /"[^";]*"/ SQUOTA_STRING: /'[^';]*'/ BQUOTA_STRING: /`[^'`]*`/ -QUERY_STRING: /[^();]+/ +QUERY_STRING: /[^{};]+/ DEC_DIGIT: /[0-9]/ // LARK diff --git a/test/integration_tests/test_native_executor.py b/test/integration_tests/test_native_executor.py index 4ea5394ab8..2139c388a6 100644 --- a/test/integration_tests/test_native_executor.py +++ b/test/integration_tests/test_native_executor.py @@ -31,7 +31,62 @@ def tearDown(self): shutdown_ray() def test_should_run_simple_query_in_sqlalchemy(self): + # Create database. + params = { + "user": "eva", + "password": "password", + "host": "127.0.0.1", + "port": "5432", + "database": "test", + } + query = """CREATE DATABASE postgres_data + WITH ENGINE = "postgres", + PARAMETERS = {};""".format( + params + ) + execute_query_fetch_all(self.evadb, query) + + # Create table. + execute_query_fetch_all( + self.evadb, + """USE postgres_data { + CREATE TABLE test_table ( + name VARCHAR(10), + age INT, + comment VARCHAR(100) + ) + };""", + ) + execute_query_fetch_all( + self.evadb, + """USE postgres_data { + INSERT INTO test_table ( + name, age, comment + ) VALUES ( + 'aa', 1, 'aaaa' + ) + } + """, + ) + + # Select. + res_batch = execute_query_fetch_all( + self.evadb, + """USE postgres_data { + SELECT * FROM test_table + } + """, + ) + self.assertEqual(len(res_batch), 1) + self.assertEqual(res_batch.frames["name"][0], "aa") + self.assertEqual(res_batch.frames["age"][0], 1) + self.assertEqual(res_batch.frames["comment"][0], "aaaa") + + # DROP table. execute_query_fetch_all( self.evadb, - "USE DemoDB (SELECT * FROM table);", + """USE postgres_data { + DROP TABLE test_table + } + """, ) diff --git a/test/parser/test_parser.py b/test/parser/test_parser.py index e1fbc55e11..ffbd60a9dc 100644 --- a/test/parser/test_parser.py +++ b/test/parser/test_parser.py @@ -63,7 +63,7 @@ def test_use_statement(self): ] for query in query_list: - use_query = f"USE DemoDB ({query});" + use_query = f"USE DemoDB {{{query}}};" evadb_stmt_list = parser.parse(use_query) # check stmt itself @@ -73,7 +73,6 @@ def test_use_statement(self): expected_stmt = UseStatement("DemoDB", query) actual_stmt = evadb_stmt_list[0] - print(actual_stmt.query_string, expected_stmt.query_string) self.assertEqual(actual_stmt, expected_stmt) def test_create_index_statement(self): From a946a96a8af5c5698879232f2d5a135bd36bef38 Mon Sep 17 00:00:00 2001 From: Jiashen Cao Date: Fri, 18 Aug 2023 09:29:32 -0400 Subject: [PATCH 5/5] skip binder and optimizer for use --- evadb/executor/plan_executor.py | 7 ++-- ...sqlalchemy_executor.py => use_executor.py} | 6 +--- evadb/optimizer/operators.py | 34 ------------------- evadb/optimizer/rules/rules.py | 18 ---------- evadb/optimizer/rules/rules_base.py | 2 -- evadb/optimizer/rules/rules_manager.py | 2 -- evadb/optimizer/statement_to_opr_converter.py | 11 ------ evadb/parser/utils.py | 3 +- 8 files changed, 7 insertions(+), 76 deletions(-) rename evadb/executor/{sqlalchemy_executor.py => use_executor.py} (94%) diff --git a/evadb/executor/plan_executor.py b/evadb/executor/plan_executor.py index e45da4960e..c593d2002d 100644 --- a/evadb/executor/plan_executor.py +++ b/evadb/executor/plan_executor.py @@ -43,13 +43,14 @@ from evadb.executor.sample_executor import SampleExecutor from evadb.executor.seq_scan_executor import SequentialScanExecutor from evadb.executor.show_info_executor import ShowInfoExecutor -from evadb.executor.sqlalchemy_executor import SQLAlchemyExecutor from evadb.executor.storage_executor import StorageExecutor from evadb.executor.union_executor import UnionExecutor +from evadb.executor.use_executor import UseExecutor from evadb.executor.vector_index_scan_executor import VectorIndexScanExecutor from evadb.models.storage.batch import Batch from evadb.parser.create_statement import CreateDatabaseStatement from evadb.parser.statement import AbstractStatement +from evadb.parser.use_statement import UseStatement from evadb.plan_nodes.abstract_plan import AbstractPlan from evadb.plan_nodes.types import PlanOprType from evadb.utils.logging_manager import logger @@ -87,6 +88,8 @@ def _build_execution_tree( # First handle cases when the plan is actually a parser statement if isinstance(plan, CreateDatabaseStatement): return CreateDatabaseExecutor(db=self._db, node=plan) + elif isinstance(plan, UseStatement): + return UseExecutor(db=self._db, node=plan) # Get plan node type plan_opr_type = plan.opr_type @@ -153,8 +156,6 @@ def _build_execution_tree( executor_node = VectorIndexScanExecutor(db=self._db, node=plan) elif plan_opr_type == PlanOprType.DELETE: executor_node = DeleteExecutor(db=self._db, node=plan) - elif plan_opr_type == PlanOprType.SQLALCHEMY: - executor_node = SQLAlchemyExecutor(db=self._db, node=plan) # EXPLAIN does not need to build execution tree for its children if plan_opr_type != PlanOprType.EXPLAIN: diff --git a/evadb/executor/sqlalchemy_executor.py b/evadb/executor/use_executor.py similarity index 94% rename from evadb/executor/sqlalchemy_executor.py rename to evadb/executor/use_executor.py index 0847e42799..e71f1dab3d 100644 --- a/evadb/executor/sqlalchemy_executor.py +++ b/evadb/executor/use_executor.py @@ -24,11 +24,7 @@ from evadb.plan_nodes.native_plan import SQLAlchemyPlan -class SQLAlchemyExecutor(AbstractExecutor): - """ - Execute SQL through SQLAlchemy engine. - """ - +class UseExecutor(AbstractExecutor): def __init__(self, db: EvaDBDatabase, node: SQLAlchemyPlan): super().__init__(db, node) self._database_name = node.database_name diff --git a/evadb/optimizer/operators.py b/evadb/optimizer/operators.py index ef175ad313..edfa2d100f 100644 --- a/evadb/optimizer/operators.py +++ b/evadb/optimizer/operators.py @@ -1240,37 +1240,3 @@ def __hash__(self) -> int: self.search_query_expr, ) ) - - -class LogicalUse(Operator): - def __init__(self, database_name: str, query_string: str, children: List = None): - super().__init__(OperatorType.LOGICAL_USE, children) - self._database_name = database_name - self._query_string = query_string - - @property - def database_name(self): - return self._database_name - - @property - def query_string(self): - return self._query_string - - def __eq__(self, other): - is_subtree_equal = super().__eq__(other) - if not isinstance(other, LogicalUse): - return False - return ( - is_subtree_equal - and self.database_name == other.database_name - and self.query_string == other.query_string - ) - - def __hash__(self) -> int: - return hash( - ( - super().__hash__(), - self.database_name, - self.query_string, - ) - ) diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py index 4f51c5bc7b..3c5f39c3a6 100644 --- a/evadb/optimizer/rules/rules.py +++ b/evadb/optimizer/rules/rules.py @@ -77,7 +77,6 @@ LogicalSample, LogicalShow, LogicalUnion, - LogicalUse, LogicalVectorIndexScan, Operator, OperatorType, @@ -94,7 +93,6 @@ from evadb.plan_nodes.lateral_join_plan import LateralJoinPlan from evadb.plan_nodes.limit_plan import LimitPlan from evadb.plan_nodes.load_data_plan import LoadDataPlan -from evadb.plan_nodes.native_plan import SQLAlchemyPlan from evadb.plan_nodes.orderby_plan import OrderByPlan from evadb.plan_nodes.rename_plan import RenamePlan from evadb.plan_nodes.seq_scan_plan import SeqScanPlan @@ -1333,21 +1331,5 @@ def apply(self, before: LogicalProject, context: OptimizerContext): yield exchange_plan -class LogicalUseToPhysical(Rule): - def __init__(self): - pattern = Pattern(OperatorType.LOGICAL_USE) - super().__init__(RuleType.LOGICAL_USE_TO_PHYSICAL, pattern) - - def promise(self): - return Promise.LOGICAL_USE_TO_PHYSICAL - - def check(self, before: LogicalUse, context: OptimizerContext): - return True - - def apply(self, before: LogicalUse, context: OptimizerContext): - # TODO: convert to different physical plan based on USE operator - yield SQLAlchemyPlan(before.database_name, before.query_string) - - # IMPLEMENTATION RULES END ############################################## diff --git a/evadb/optimizer/rules/rules_base.py b/evadb/optimizer/rules/rules_base.py index 00fb0de9ad..c2a2907bca 100644 --- a/evadb/optimizer/rules/rules_base.py +++ b/evadb/optimizer/rules/rules_base.py @@ -82,7 +82,6 @@ class RuleType(Flag): LOGICAL_CREATE_INDEX_TO_VECTOR_INDEX = auto() LOGICAL_APPLY_AND_MERGE_TO_PHYSICAL = auto() LOGICAL_VECTOR_INDEX_SCAN_TO_PHYSICAL = auto() - LOGICAL_USE_TO_PHYSICAL = auto() IMPLEMENTATION_DELIMITER = auto() NUM_RULES = auto() @@ -125,7 +124,6 @@ class Promise(IntEnum): LOGICAL_CREATE_INDEX_TO_VECTOR_INDEX = auto() LOGICAL_APPLY_AND_MERGE_TO_PHYSICAL = auto() LOGICAL_VECTOR_INDEX_SCAN_TO_PHYSICAL = auto() - LOGICAL_USE_TO_PHYSICAL = auto() # IMPLEMENTATION DELIMITER IMPLEMENTATION_DELIMITER = auto() diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py index df258716c0..a8f1053200 100644 --- a/evadb/optimizer/rules/rules_manager.py +++ b/evadb/optimizer/rules/rules_manager.py @@ -53,7 +53,6 @@ LogicalRenameToPhysical, LogicalShowToPhysical, LogicalUnionToPhysical, - LogicalUseToPhysical, LogicalVectorIndexScanToPhysical, PushDownFilterThroughApplyAndMerge, PushDownFilterThroughJoin, @@ -113,7 +112,6 @@ def __init__(self, config: ConfigurationManager): LogicalExplainToPhysical(), LogicalCreateIndexToVectorIndex(), LogicalVectorIndexScanToPhysical(), - LogicalUseToPhysical(), ] # These rules are enabled only if diff --git a/evadb/optimizer/statement_to_opr_converter.py b/evadb/optimizer/statement_to_opr_converter.py index e7867ade6d..fe1387fd70 100644 --- a/evadb/optimizer/statement_to_opr_converter.py +++ b/evadb/optimizer/statement_to_opr_converter.py @@ -36,7 +36,6 @@ LogicalSample, LogicalShow, LogicalUnion, - LogicalUse, ) from evadb.optimizer.optimizer_utils import ( column_definition_to_udf_io, @@ -56,7 +55,6 @@ from evadb.parser.statement import AbstractStatement from evadb.parser.table_ref import TableRef from evadb.parser.types import UDFType -from evadb.parser.use_statement import UseStatement from evadb.utils.logging_manager import logger @@ -320,13 +318,6 @@ def visit_delete(self, statement: DeleteTableStatement): ) self._plan = delete_opr - def visit_use(self, statement: UseStatement): - use_opr = LogicalUse( - statement.database_name, - statement.query_string, - ) - self._plan = use_opr - def visit(self, statement: AbstractStatement): """Based on the instance of the statement the corresponding visit is called. @@ -357,8 +348,6 @@ def visit(self, statement: AbstractStatement): self.visit_create_index(statement) elif isinstance(statement, DeleteTableStatement): self.visit_delete(statement) - elif isinstance(statement, UseStatement): - self.visit_use(statement) return self._plan @property diff --git a/evadb/parser/utils.py b/evadb/parser/utils.py index aa1b1dd349..a4f0dec91f 100644 --- a/evadb/parser/utils.py +++ b/evadb/parser/utils.py @@ -23,10 +23,11 @@ from evadb.parser.select_statement import SelectStatement from evadb.parser.show_statement import ShowStatement from evadb.parser.types import ObjectType +from evadb.parser.use_statement import UseStatement # List of statements for which we omit binder and optimizer and pass the statement # directly to the executor. -SKIP_BINDER_AND_OPTIMIZER_STATEMENTS = (CreateDatabaseStatement,) +SKIP_BINDER_AND_OPTIMIZER_STATEMENTS = (CreateDatabaseStatement, UseStatement) def parse_expression(expr: str):