From 5fad0f4b91aff2f4a03b981b0485863880038a00 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 15 Aug 2023 00:01:03 -0400 Subject: [PATCH 1/7] create database parser added --- evadb/parser/create_statement.py | 39 +++++++++++++++++++ evadb/parser/evadb.lark | 12 +++++- evadb/parser/lark_visitor/__init__.py | 3 +- .../parser/lark_visitor/_create_statements.py | 35 +++++++++++++++++ evadb/parser/lark_visitor/_expressions.py | 10 +++++ evadb/parser/types.py | 1 + test/parser/test_parser_statements.py | 10 +++++ 7 files changed, 108 insertions(+), 2 deletions(-) diff --git a/evadb/parser/create_statement.py b/evadb/parser/create_statement.py index c47fb7ad24..c3285ecd42 100644 --- a/evadb/parser/create_statement.py +++ b/evadb/parser/create_statement.py @@ -183,3 +183,42 @@ def __hash__(self) -> int: self.query, ) ) + + +class CreateDatabaseStatement(AbstractStatement): + def __init__( + self, database_name: str, if_not_exists: bool, engine: str, param_list: dict + ): + super().__init__(StatementType.CREATE_DATABASE) + self.database_name = database_name + self.if_not_exists = if_not_exists + self.engine = engine + self.param_list = param_list + + def __eq__(self, other): + if not isinstance(other, CreateDatabaseStatement): + return False + return ( + self.database_name == other.database_name + and self.if_not_exists == other.if_not_exists + and self.engine == other.engine + and self.param_list == other.param_list + ) + + def __hash__(self) -> int: + return hash( + ( + super().__hash__(), + self.database_name, + self.if_not_exists, + self.engine, + hash(frozenset(self.param_list.items())), + ) + ) + + def __str__(self) -> str: + return ( + f"CREATE DATABASE {self.database_name} \n" + f"WITH ENGINE '{self.engine}' , \n" + f"PARAMETERS = {self.param_list};" + ) diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index 53a08746ce..f041016b73 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -16,7 +16,13 @@ utility_statement: describe_statement | show_statement | help_statement | explai // Create statements -create_database: CREATE DATABASE if_not_exists? uid +create_database: CREATE DATABASE if_not_exists? uid create_database_engine_clause + +colon_param: string_literal ":" string_literal + +colon_param_dict: LR_CURLY_BRACKET colon_param ("," colon_param)* RR_CURLY_BRACKET + +create_database_engine_clause: WITH ENGINE "=" string_literal "," PARAMETERS "=" colon_param_dict create_index: CREATE INDEX uid ON table_name index_elem vector_store_type? @@ -320,6 +326,7 @@ DESC: "DESC"i DESCRIBE: "DESCRIBE"i DISTINCT: "DISTINCT"i DROP: "DROP"i +ENGINE: "ENGINE"i EXIT: "EXIT"i EXISTS: "EXISTS"i EXPLAIN: "EXPLAIN"i @@ -349,6 +356,7 @@ ON: "ON"i OR: "OR"i ORDER: "ORDER"i PATH: "PATH"i +PARAMETERS: "PARAMETERS"i PRIMARY: "PRIMARY"i REFERENCES: "REFERENCES"i RENAME: "RENAME"i @@ -501,6 +509,8 @@ LR_BRACKET: "(" RR_BRACKET: ")" LR_SQ_BRACKET: "[" RR_SQ_BRACKET: "]" +LR_CURLY_BRACKET: "{" +RR_CURLY_BRACKET: "}" COMMA: "," SEMI: ";" AT_SIGN: "@" diff --git a/evadb/parser/lark_visitor/__init__.py b/evadb/parser/lark_visitor/__init__.py index 3bfa1e50b0..c27cc6a6a1 100644 --- a/evadb/parser/lark_visitor/__init__.py +++ b/evadb/parser/lark_visitor/__init__.py @@ -17,7 +17,7 @@ from lark import Tree, visitors from evadb.parser.lark_visitor._common_clauses_ids import CommonClauses -from evadb.parser.lark_visitor._create_statements import CreateTable +from evadb.parser.lark_visitor._create_statements import CreateDatabase, CreateTable from evadb.parser.lark_visitor._delete_statement import Delete from evadb.parser.lark_visitor._drop_statement import DropObject from evadb.parser.lark_visitor._explain_statement import Explain @@ -58,6 +58,7 @@ class LarkInterpreter( LarkBaseInterpreter, CommonClauses, CreateTable, + CreateDatabase, Expressions, Functions, Insert, diff --git a/evadb/parser/lark_visitor/_create_statements.py b/evadb/parser/lark_visitor/_create_statements.py index 15874eaefb..ad3aa892cf 100644 --- a/evadb/parser/lark_visitor/_create_statements.py +++ b/evadb/parser/lark_visitor/_create_statements.py @@ -21,6 +21,7 @@ from evadb.parser.create_statement import ( ColConstraintInfo, ColumnDefinition, + CreateDatabaseStatement, CreateTableStatement, ) from evadb.parser.table_ref import TableRef @@ -277,3 +278,37 @@ def create_index(self, tree): return CreateIndexStatement( index_name, table_ref, col_list, vector_store_type, udf_func ) + + +class CreateDatabase: + def create_database(self, tree): + database_name = None + if_not_exists = False + engine = None + param_list = [] + + for child in tree.children: + if isinstance(child, Tree): + if child.data == "if_not_exists": + if_not_exists = True + elif child.data == "uid": + database_name = self.visit(child) + elif child.data == "create_database_engine_clause": + engine, param_list = self.visit(child) + + create_stmt = CreateDatabaseStatement( + database_name, if_not_exists, engine, param_list + ) + return create_stmt + + def create_database_engine_clause(self, tree): + engine = None + param_list = [] + for child in tree.children: + if isinstance(child, Tree): + if child.data == "uid": + engine = self.visit(child) + elif child.data == "colon_param_dict": + param_list = self.visit(child) + + return engine, param_list diff --git a/evadb/parser/lark_visitor/_expressions.py b/evadb/parser/lark_visitor/_expressions.py index aebc5a36b7..3f8d5880e7 100644 --- a/evadb/parser/lark_visitor/_expressions.py +++ b/evadb/parser/lark_visitor/_expressions.py @@ -154,3 +154,13 @@ def chunk_params(self, tree): } else: assert f"incorrect keyword found {chunk_params[0]}" + + def colon_param_dict(self, tree): + param_dict = [] + for child in tree.children: + if isinstance(child, Tree): + if child.data == "colon_param": + key = child.value[0] + value = child.value[2] + param_dict[key] = value + return param_dict diff --git a/evadb/parser/types.py b/evadb/parser/types.py index 14e665f5a9..406322c9f6 100644 --- a/evadb/parser/types.py +++ b/evadb/parser/types.py @@ -39,6 +39,7 @@ class StatementType(EvaDBEnum): SHOW # noqa: F821 EXPLAIN # noqa: F821 CREATE_INDEX # noqa: F821 + CREATE_DATABASE # noqa: F821 # add other types diff --git a/test/parser/test_parser_statements.py b/test/parser/test_parser_statements.py index 25e3847d17..513d2e6ea7 100644 --- a/test/parser/test_parser_statements.py +++ b/test/parser/test_parser_statements.py @@ -139,6 +139,16 @@ def test_parser_statement_types(self): Type FaceDetection Impl 'evadb/udfs/face_detector.py'; """, + """CREATE DATABASE example_db + WITH ENGINE = "postgres", + PARAMETERS = { + "user": "demo_user", + "password": "demo_password", + "host": "3.220.66.106", + "port": "5432", + "database": "demo" + }; + """, ] queries = queries + randomized_cases ref_stmt = parser.parse(queries[0])[0] From 291038d77a84db0a17a2b2aad14fd82dea207634 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 15 Aug 2023 15:43:15 -0400 Subject: [PATCH 2/7] persist database info int the catalog --- evadb/catalog/models/utils.py | 40 ++++++++++++++++++++++ evadb/executor/create_database_executor.py | 30 ++++++++++++++++ evadb/executor/plan_executor.py | 6 ++++ evadb/parser/utils.py | 6 +++- evadb/server/command_handler.py | 14 ++++++-- 5 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 evadb/executor/create_database_executor.py diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py index b831b866fa..e20d0da5f1 100644 --- a/evadb/catalog/models/utils.py +++ b/evadb/catalog/models/utils.py @@ -13,11 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import contextlib +import json from dataclasses import dataclass, field from typing import List, Tuple import sqlalchemy from sqlalchemy.engine import Engine +from sqlalchemy.types import TypeDecorator from sqlalchemy_utils import create_database, database_exists from evadb.catalog.catalog_type import ( @@ -31,6 +33,25 @@ from evadb.utils.logging_manager import logger +class TextPickleType(TypeDecorator): + """Used to handle serialization and deserialization to Text + https://stackoverflow.com/questions/1378325/python-dicts-in-sqlalchemy + """ + + impl = sqlalchemy.Text(1024) + + def process_bind_param(self, value, dialect): + if value is not None: + value = json.dumps(value) + + return value + + def process_result_value(self, value, dialect): + if value is not None: + value = json.loads(value) + return value + + def init_db(engine: Engine): """Create database if doesn't exist and create all tables.""" if not database_exists(engine.url): @@ -209,3 +230,22 @@ def _to_str(col): "impl": self.impl_file_path, "metadata": self.metadata, } + + +@dataclass(unsafe_hash=True) +class DatabaseCatalogEntry: + """Dataclass representing an entry in the `DatabaseCatalog`. + This is done to ensure we don't expose the sqlalchemy dependencies beyond catalog service. Further, sqlalchemy does not allow sharing of objects across threads. + """ + + name: str + engine: str + params: dict + row_id: int = None + + def display_format(self): + return { + "name": self.name, + "engine": self.engine, + "params": self.params, + } diff --git a/evadb/executor/create_database_executor.py b/evadb/executor/create_database_executor.py new file mode 100644 index 0000000000..754d79cc16 --- /dev/null +++ b/evadb/executor/create_database_executor.py @@ -0,0 +1,30 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from evadb.database import EvaDBDatabase +from evadb.executor.abstract_executor import AbstractExecutor +from evadb.parser.create_statement import CreateDatabaseStatement +from evadb.storage.storage_engine import StorageEngine +from evadb.utils.logging_manager import logger + + +class CreateDatabaseExecutor(AbstractExecutor): + def __init__(self, db: EvaDBDatabase, node: CreateDatabaseStatement): + super().__init__(db, node) + + def exec(self, *args, **kwargs): + self.node.database_name + self.node.if_not_exists + self.node.engine + self.node.param_list diff --git a/evadb/executor/plan_executor.py b/evadb/executor/plan_executor.py index b5ebbda3f7..9938bc29fb 100644 --- a/evadb/executor/plan_executor.py +++ b/evadb/executor/plan_executor.py @@ -17,6 +17,7 @@ from evadb.database import EvaDBDatabase from evadb.executor.abstract_executor import AbstractExecutor from evadb.executor.apply_and_merge_executor import ApplyAndMergeExecutor +from evadb.executor.create_database_executor import CreateDatabaseExecutor from evadb.executor.create_executor import CreateExecutor from evadb.executor.create_index_executor import CreateIndexExecutor from evadb.executor.create_udf_executor import CreateUDFExecutor @@ -46,6 +47,7 @@ from evadb.executor.union_executor import UnionExecutor from evadb.executor.vector_index_scan_executor import VectorIndexScanExecutor from evadb.models.storage.batch import Batch +from evadb.parser.create_statement import CreateDatabaseStatement from evadb.plan_nodes.abstract_plan import AbstractPlan from evadb.plan_nodes.types import PlanOprType from evadb.utils.logging_manager import logger @@ -81,6 +83,10 @@ def _build_execution_tree(self, plan: AbstractPlan) -> AbstractExecutor: # Get plan node type plan_opr_type = plan.opr_type + # First handle cases when the plan is actually a parser statement + if isinstance(plan_opr_type, CreateDatabaseStatement): + return CreateDatabaseExecutor(db=self._db, node=plan) + if plan_opr_type == PlanOprType.SEQUENTIAL_SCAN: executor_node = SequentialScanExecutor(db=self._db, node=plan) elif plan_opr_type == PlanOprType.UNION: diff --git a/evadb/parser/utils.py b/evadb/parser/utils.py index bf877555da..3598a6206a 100644 --- a/evadb/parser/utils.py +++ b/evadb/parser/utils.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from evadb.parser.create_statement import CreateTableStatement +from evadb.parser.create_statement import CreateDatabaseStatement, CreateTableStatement from evadb.parser.create_udf_statement import CreateUDFStatement from evadb.parser.drop_object_statement import DropObjectStatement from evadb.parser.explain_statement import ExplainStatement @@ -24,6 +24,10 @@ from evadb.parser.show_statement import ShowStatement from evadb.parser.types import ObjectType +# List of statements for which we omit binder and optimizer and pass the statement +# directly to the executor. +SKIP_BINDER_AND_OPTIMIZER_STATEMENTS = [CreateDatabaseStatement] + def parse_expression(expr: str): mock_query = f"SELECT {expr} FROM DUMMY;" diff --git a/evadb/server/command_handler.py b/evadb/server/command_handler.py index 46b8a12d17..d9d669b8ad 100644 --- a/evadb/server/command_handler.py +++ b/evadb/server/command_handler.py @@ -23,6 +23,7 @@ from evadb.optimizer.plan_generator import PlanGenerator from evadb.optimizer.statement_to_opr_converter import StatementToPlanConverter from evadb.parser.parser import Parser +from evadb.parser.utils import SKIP_BINDER_AND_OPTIMIZER_STATEMENTS from evadb.utils.logging_manager import logger from evadb.utils.stats import Timer @@ -42,9 +43,16 @@ def execute_query( plan_generator = kwargs.pop("plan_generator", PlanGenerator(evadb)) with query_compile_time: stmt = Parser().parse(query)[0] - StatementBinder(StatementBinderContext(evadb.catalog)).bind(stmt) - l_plan = StatementToPlanConverter().visit(stmt) - p_plan = plan_generator.build(l_plan) + + # For certain statements, we plan to omit binder and optimizer to keep the code + # clean. So, we handle such cases here and pass the statement directly to the + # executor. + if stmt not in SKIP_BINDER_AND_OPTIMIZER_STATEMENTS: + StatementBinder(StatementBinderContext(evadb.catalog)).bind(stmt) + l_plan = StatementToPlanConverter().visit(stmt) + p_plan = plan_generator.build(l_plan) + else: + p_plan = stmt output = PlanExecutor(evadb, p_plan).execute_plan( do_not_raise_exceptions, do_not_print_exceptions ) From eec5c5955b19a3840feb12ade4f535c116e7f5a8 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 15 Aug 2023 19:52:19 -0400 Subject: [PATCH 3/7] create db executor added --- evadb/executor/create_database_executor.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/evadb/executor/create_database_executor.py b/evadb/executor/create_database_executor.py index 754d79cc16..687959b6dc 100644 --- a/evadb/executor/create_database_executor.py +++ b/evadb/executor/create_database_executor.py @@ -12,10 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd from evadb.database import EvaDBDatabase from evadb.executor.abstract_executor import AbstractExecutor +from evadb.models.storage.batch import Batch from evadb.parser.create_statement import CreateDatabaseStatement -from evadb.storage.storage_engine import StorageEngine from evadb.utils.logging_manager import logger @@ -24,7 +25,18 @@ def __init__(self, db: EvaDBDatabase, node: CreateDatabaseStatement): super().__init__(db, node) def exec(self, *args, **kwargs): - self.node.database_name - self.node.if_not_exists - self.node.engine - self.node.param_list + # todo handle if_not_exists + + logger.debug(f"Creating database {self.node}") + + self.catalog().create_and_insert_database_catalog_entry( + self.node.database_name, self.node.engine, self.node.param_list + ) + + yield Batch( + pd.DataFrame( + [ + f"The database {self.node.database_name} has been successfully created." + ] + ) + ) From e3ad1a0609a081cfa51453212571cfbac05e2569 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 15 Aug 2023 20:16:09 -0400 Subject: [PATCH 4/7] create db catalog services added --- evadb/catalog/catalog_manager.py | 13 +++ .../services/database_catalog_service.py | 83 +++++++++++++++++++ evadb/executor/create_database_executor.py | 2 +- 3 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 evadb/catalog/services/database_catalog_service.py diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 8bc3e78264..dea3351668 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -121,6 +121,19 @@ def _clear_catalog_contents(self): # clean up the dataset, index, and cache directories cleanup_storage(self._config) + "Database catalog services" + + def insert_database_catalog_entry(self, name: str, engine: str, params: dict): + """A new entry is persisted in the database catalog." + + Args: + name: database name + engine: engine name + params: required params as a dictionary for the database + """ + self._database_catalog_service.insert_entry(name, engine, params) + + "Table catalog services" def insert_table_catalog_entry( diff --git a/evadb/catalog/services/database_catalog_service.py b/evadb/catalog/services/database_catalog_service.py new file mode 100644 index 0000000000..b83e9dcdc6 --- /dev/null +++ b/evadb/catalog/services/database_catalog_service.py @@ -0,0 +1,83 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import select + +from evadb.catalog.models.database_catalog import DatabaseCatalog +from evadb.catalog.models.utils import DatabaseCatalogEntry +from evadb.catalog.services.base_service import BaseService +from evadb.utils.errors import CatalogError +from evadb.utils.logging_manager import logger + + +class DatabaseCatalogService(BaseService): + def __init__(self, db_session: Session): + super().__init__(DatabaseCatalog, db_session) + + def insert_entry( + self, + name: str, + engine: str, + params: dict, + ): + try: + db_catalog_obj = self.model( + name=name, + engine=engine, + params=params, + ) + db_catalog_obj = db_catalog_obj.save(self.session) + + except Exception as e: + logger.exception( + f"Failed to insert entry into database catalog with exception {str(e)}" + ) + raise CatalogError(e) + + def get_entry_by_name(self, database_name: str) -> DatabaseCatalogEntry: + """ + Get the table catalog entry with given table name. + Arguments: + database_name (str): Database name + Returns: + DatabaseCatalogEntry - catalog entry for given database name + """ + entry = self.session.execute( + select(self.model).filter(self.model._name == database_name) + ).scalar_one_or_none() + if entry: + return entry.as_dataclass() + return entry + + def delete_entry(self, database_entry: DatabaseCatalogEntry): + """Delete database from the catalog + Arguments: + database (DatabaseCatalogEntry): database to delete + Returns: + True if successfully removed else false + """ + try: + db_catalog_obj = self.session.execute( + select(self.model).filter(self.model._row_id == database_entry.row_id) + ).scalar_one_or_none() + db_catalog_obj.delete(self.session) + return True + except Exception as e: + err_msg = ( + f"Delete database failed for {database_entry} with error {str(e)}." + ) + logger.exception(err_msg) + raise CatalogError(err_msg) diff --git a/evadb/executor/create_database_executor.py b/evadb/executor/create_database_executor.py index 687959b6dc..56b7fc0810 100644 --- a/evadb/executor/create_database_executor.py +++ b/evadb/executor/create_database_executor.py @@ -29,7 +29,7 @@ def exec(self, *args, **kwargs): logger.debug(f"Creating database {self.node}") - self.catalog().create_and_insert_database_catalog_entry( + self.catalog().insert_database_catalog_entry( self.node.database_name, self.node.engine, self.node.param_list ) From 61ab897d0bae23647da9a317e3644be3e6b61835 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 15 Aug 2023 23:06:06 -0400 Subject: [PATCH 5/7] create database testcase added --- evadb/catalog/catalog_manager.py | 34 +++++++++++- evadb/catalog/catalog_utils.py | 13 ----- evadb/catalog/sql_config.py | 1 + evadb/executor/create_database_executor.py | 8 ++- evadb/executor/plan_executor.py | 15 +++-- evadb/parser/create_statement.py | 10 ++-- .../parser/lark_visitor/_create_statements.py | 16 +++--- evadb/parser/lark_visitor/_expressions.py | 7 ++- evadb/parser/utils.py | 2 +- evadb/server/command_handler.py | 2 +- .../test_create_database_executor.py | 55 +++++++++++++++++++ test/parser/test_parser_statements.py | 8 +-- 12 files changed, 128 insertions(+), 43 deletions(-) create mode 100644 test/integration_tests/test_create_database_executor.py diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index dea3351668..ac4c7a0e1d 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -33,6 +33,7 @@ ) from evadb.catalog.models.utils import ( ColumnCatalogEntry, + DatabaseCatalogEntry, IndexCatalogEntry, TableCatalogEntry, UdfCacheCatalogEntry, @@ -45,6 +46,7 @@ truncate_catalog_tables, ) from evadb.catalog.services.column_catalog_service import ColumnCatalogService +from evadb.catalog.services.database_catalog_service import DatabaseCatalogService from evadb.catalog.services.index_catalog_service import IndexCatalogService from evadb.catalog.services.table_catalog_service import TableCatalogService from evadb.catalog.services.udf_cache_catalog_service import UdfCacheCatalogService @@ -70,6 +72,7 @@ def __init__(self, db_uri: str, config: ConfigurationManager): self._sql_config = SQLConfig(db_uri) self._config = config self._bootstrap_catalog() + self._db_catalog_service = DatabaseCatalogService(self._sql_config.session) self._table_catalog_service = TableCatalogService(self._sql_config.session) self._column_service = ColumnCatalogService(self._sql_config.session) self._udf_service = UdfCatalogService(self._sql_config.session) @@ -131,8 +134,37 @@ def insert_database_catalog_entry(self, name: str, engine: str, params: dict): engine: engine name params: required params as a dictionary for the database """ - self._database_catalog_service.insert_entry(name, engine, params) + self._db_catalog_service.insert_entry(name, engine, params) + def get_database_catalog_entry(self, database_name: str) -> DatabaseCatalogEntry: + """ + Returns the database catalog entry for the given database_name + Arguments: + database_name (str): name of the database + + Returns: + DatabaseCatalogEntry + """ + + table_entry = self._db_catalog_service.get_entry_by_name(database_name) + + return table_entry + + def delete_database_catalog_entry( + self, database_entry: DatabaseCatalogEntry + ) -> bool: + """ + This method deletes the database from catalog. + + Arguments: + database_entry: database catalog entry to remove + + Returns: + True if successfully deleted else False + """ + # todo: do we need to remove also the associated tables etc or that will be + # taken care by the underlying db + return self._db_catalog_service.delete_entry(database_entry) "Table catalog services" diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py index 496da35eba..d3fe528104 100644 --- a/evadb/catalog/catalog_utils.py +++ b/evadb/catalog/catalog_utils.py @@ -38,19 +38,6 @@ from evadb.parser.create_statement import ColConstraintInfo, ColumnDefinition from evadb.utils.generic_utils import get_str_hash, remove_directory_contents -CATALOG_TABLES = [ - "column_catalog", - "table_catalog", - "depend_column_and_udf_cache", - "udf_cache", - "udf_catalog", - "depend_udf_and_udf_cache", - "index_catalog", - "udfio_catalog", - "udf_cost_catalog", - "udf_metadata_catalog", -] - def is_video_table(table: TableCatalogEntry): return table.table_type == TableType.VIDEO_DATA diff --git a/evadb/catalog/sql_config.py b/evadb/catalog/sql_config.py index 0027ca5431..3024ff69e0 100644 --- a/evadb/catalog/sql_config.py +++ b/evadb/catalog/sql_config.py @@ -23,6 +23,7 @@ CATALOG_TABLES = [ "column_catalog", "table_catalog", + "database_catalog", "depend_column_and_udf_cache", "udf_cache", "udf_catalog", diff --git a/evadb/executor/create_database_executor.py b/evadb/executor/create_database_executor.py index 56b7fc0810..a0b5aec751 100644 --- a/evadb/executor/create_database_executor.py +++ b/evadb/executor/create_database_executor.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import pandas as pd + from evadb.database import EvaDBDatabase from evadb.executor.abstract_executor import AbstractExecutor from evadb.models.storage.batch import Batch @@ -27,10 +28,15 @@ def __init__(self, db: EvaDBDatabase, node: CreateDatabaseStatement): def exec(self, *args, **kwargs): # todo handle if_not_exists + logger.debug( + f"Trying to connect to the provided engine {self.node.engine} with params {self.node.param_dict}" + ) + # todo handle if the provided database params are valid + logger.debug(f"Creating database {self.node}") self.catalog().insert_database_catalog_entry( - self.node.database_name, self.node.engine, self.node.param_list + self.node.database_name, self.node.engine, self.node.param_dict ) yield Batch( diff --git a/evadb/executor/plan_executor.py b/evadb/executor/plan_executor.py index 9938bc29fb..8ab6460540 100644 --- a/evadb/executor/plan_executor.py +++ b/evadb/executor/plan_executor.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterator +from typing import Iterator, Union from evadb.database import EvaDBDatabase from evadb.executor.abstract_executor import AbstractExecutor @@ -48,6 +48,7 @@ from evadb.executor.vector_index_scan_executor import VectorIndexScanExecutor from evadb.models.storage.batch import Batch from evadb.parser.create_statement import CreateDatabaseStatement +from evadb.parser.statement import AbstractStatement from evadb.plan_nodes.abstract_plan import AbstractPlan from evadb.plan_nodes.types import PlanOprType from evadb.utils.logging_manager import logger @@ -67,7 +68,9 @@ def __init__(self, evadb: EvaDBDatabase, plan: AbstractPlan): self._db = evadb self._plan = plan - def _build_execution_tree(self, plan: AbstractPlan) -> AbstractExecutor: + def _build_execution_tree( + self, plan: Union[AbstractPlan, AbstractStatement] + ) -> AbstractExecutor: """build the execution tree from plan tree Arguments: @@ -80,13 +83,13 @@ def _build_execution_tree(self, plan: AbstractPlan) -> AbstractExecutor: if plan is None: return root - # Get plan node type - plan_opr_type = plan.opr_type - # First handle cases when the plan is actually a parser statement - if isinstance(plan_opr_type, CreateDatabaseStatement): + if isinstance(plan, CreateDatabaseStatement): return CreateDatabaseExecutor(db=self._db, node=plan) + # Get plan node type + plan_opr_type = plan.opr_type + if plan_opr_type == PlanOprType.SEQUENTIAL_SCAN: executor_node = SequentialScanExecutor(db=self._db, node=plan) elif plan_opr_type == PlanOprType.UNION: diff --git a/evadb/parser/create_statement.py b/evadb/parser/create_statement.py index c3285ecd42..5fcffaff32 100644 --- a/evadb/parser/create_statement.py +++ b/evadb/parser/create_statement.py @@ -187,13 +187,13 @@ def __hash__(self) -> int: class CreateDatabaseStatement(AbstractStatement): def __init__( - self, database_name: str, if_not_exists: bool, engine: str, param_list: dict + self, database_name: str, if_not_exists: bool, engine: str, param_dict: dict ): super().__init__(StatementType.CREATE_DATABASE) self.database_name = database_name self.if_not_exists = if_not_exists self.engine = engine - self.param_list = param_list + self.param_dict = param_dict def __eq__(self, other): if not isinstance(other, CreateDatabaseStatement): @@ -202,7 +202,7 @@ def __eq__(self, other): self.database_name == other.database_name and self.if_not_exists == other.if_not_exists and self.engine == other.engine - and self.param_list == other.param_list + and self.param_dict == other.param_dict ) def __hash__(self) -> int: @@ -212,7 +212,7 @@ def __hash__(self) -> int: self.database_name, self.if_not_exists, self.engine, - hash(frozenset(self.param_list.items())), + hash(frozenset(self.param_dict.items())), ) ) @@ -220,5 +220,5 @@ def __str__(self) -> str: return ( f"CREATE DATABASE {self.database_name} \n" f"WITH ENGINE '{self.engine}' , \n" - f"PARAMETERS = {self.param_list};" + f"PARAMETERS = {self.param_dict};" ) diff --git a/evadb/parser/lark_visitor/_create_statements.py b/evadb/parser/lark_visitor/_create_statements.py index ad3aa892cf..07f06ba2f8 100644 --- a/evadb/parser/lark_visitor/_create_statements.py +++ b/evadb/parser/lark_visitor/_create_statements.py @@ -285,7 +285,7 @@ def create_database(self, tree): database_name = None if_not_exists = False engine = None - param_list = [] + param_dict = {} for child in tree.children: if isinstance(child, Tree): @@ -294,21 +294,21 @@ def create_database(self, tree): elif child.data == "uid": database_name = self.visit(child) elif child.data == "create_database_engine_clause": - engine, param_list = self.visit(child) + engine, param_dict = self.visit(child) create_stmt = CreateDatabaseStatement( - database_name, if_not_exists, engine, param_list + database_name, if_not_exists, engine, param_dict ) return create_stmt def create_database_engine_clause(self, tree): engine = None - param_list = [] + param_dict = {} for child in tree.children: if isinstance(child, Tree): - if child.data == "uid": - engine = self.visit(child) + if child.data == "string_literal": + engine = self.visit(child).value elif child.data == "colon_param_dict": - param_list = self.visit(child) + param_dict = self.visit(child) - return engine, param_list + return engine, param_dict diff --git a/evadb/parser/lark_visitor/_expressions.py b/evadb/parser/lark_visitor/_expressions.py index 3f8d5880e7..c5cf5a0bfe 100644 --- a/evadb/parser/lark_visitor/_expressions.py +++ b/evadb/parser/lark_visitor/_expressions.py @@ -156,11 +156,12 @@ def chunk_params(self, tree): assert f"incorrect keyword found {chunk_params[0]}" def colon_param_dict(self, tree): - param_dict = [] + param_dict = {} for child in tree.children: if isinstance(child, Tree): if child.data == "colon_param": - key = child.value[0] - value = child.value[2] + param = self.visit(child) + key = param[0].value + value = param[1].value param_dict[key] = value return param_dict diff --git a/evadb/parser/utils.py b/evadb/parser/utils.py index 3598a6206a..aa1b1dd349 100644 --- a/evadb/parser/utils.py +++ b/evadb/parser/utils.py @@ -26,7 +26,7 @@ # List of statements for which we omit binder and optimizer and pass the statement # directly to the executor. -SKIP_BINDER_AND_OPTIMIZER_STATEMENTS = [CreateDatabaseStatement] +SKIP_BINDER_AND_OPTIMIZER_STATEMENTS = (CreateDatabaseStatement,) def parse_expression(expr: str): diff --git a/evadb/server/command_handler.py b/evadb/server/command_handler.py index d9d669b8ad..44882b746c 100644 --- a/evadb/server/command_handler.py +++ b/evadb/server/command_handler.py @@ -47,7 +47,7 @@ def execute_query( # For certain statements, we plan to omit binder and optimizer to keep the code # clean. So, we handle such cases here and pass the statement directly to the # executor. - if stmt not in SKIP_BINDER_AND_OPTIMIZER_STATEMENTS: + if not isinstance(stmt, SKIP_BINDER_AND_OPTIMIZER_STATEMENTS): StatementBinder(StatementBinderContext(evadb.catalog)).bind(stmt) l_plan = StatementToPlanConverter().visit(stmt) p_plan = plan_generator.build(l_plan) diff --git a/test/integration_tests/test_create_database_executor.py b/test/integration_tests/test_create_database_executor.py new file mode 100644 index 0000000000..29bcc3d0de --- /dev/null +++ b/test/integration_tests/test_create_database_executor.py @@ -0,0 +1,55 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from test.util import get_evadb_for_testing, shutdown_ray + +from evadb.server.command_handler import execute_query_fetch_all + + +class CreateDatabaseTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.evadb = get_evadb_for_testing() + # reset the catalog manager before running each test + cls.evadb.catalog().reset() + + @classmethod + def tearDownClass(cls): + shutdown_ray() + + def test_create_database_should_add_the_entry(self): + params = { + "user": "user", + "password": "password", + "host": "127.0.0.1", + "port": "5432", + "database": "demo", + } + query = """CREATE DATABASE demo_db + WITH ENGINE = "postgres", + PARAMETERS = {};""".format( + params + ) + + execute_query_fetch_all(self.evadb, query) + + db_entry = self.evadb.catalog().get_database_catalog_entry("demo_db") + self.assertEqual(db_entry.name, "demo_db") + self.assertEqual(db_entry.engine, "postgres") + self.assertEqual(db_entry.params, params) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/parser/test_parser_statements.py b/test/parser/test_parser_statements.py index 513d2e6ea7..83249759b3 100644 --- a/test/parser/test_parser_statements.py +++ b/test/parser/test_parser_statements.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from pprint import pprint from evadb.parser.parser import Parser @@ -160,9 +159,6 @@ def test_parser_statement_types(self): for other_query in queries[1:]: stmt = parser.parse(other_query)[0] - - pprint(str(stmt)) - # Check eq operator self.assertNotEqual(stmt, ref_stmt) self.assertEqual(stmt, stmt) @@ -173,3 +169,7 @@ def test_parser_statement_types(self): # Check hash operator statement_to_query_dict[stmt] = other_query + + +if __name__ == "__main__": + unittest.main() From 3fce980a48bb46c0bf5d05d3d806a0a8c794f9b3 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 15 Aug 2023 23:33:58 -0400 Subject: [PATCH 6/7] add missing file --- evadb/catalog/models/database_catalog.py | 47 ++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 evadb/catalog/models/database_catalog.py diff --git a/evadb/catalog/models/database_catalog.py b/evadb/catalog/models/database_catalog.py new file mode 100644 index 0000000000..7e99994078 --- /dev/null +++ b/evadb/catalog/models/database_catalog.py @@ -0,0 +1,47 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sqlalchemy import Column, String + +from evadb.catalog.models.base_model import BaseModel +from evadb.catalog.models.utils import DatabaseCatalogEntry, TextPickleType + + +class DatabaseCatalog(BaseModel): + """The `DatabaseCatalog` catalog stores information about all databases. + `_row_id:` an autogenerated unique identifier. + `_name:` the database name. + `_engine:` database engine + `_param:` parameters specific to the database engine + """ + + __tablename__ = "database_catalog" + + _name = Column("name", String(100), unique=True) + _engine = Column("engine", String(100)) + _params = Column("params", TextPickleType()) + + def __init__(self, name: str, engine: str, params: dict): + self._name = name + self._engine = engine + self._params = params + + def as_dataclass(self) -> "DatabaseCatalogEntry": + return DatabaseCatalogEntry( + row_id=self._row_id, + name=self._name, + engine=self._engine, + params=self._params, + ) From 9fc6b87603f472595c948110701588a5a627bf14 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Wed, 16 Aug 2023 18:55:20 -0400 Subject: [PATCH 7/7] fix postgres --- evadb/catalog/models/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py index a868a621fe..2b92a8b4da 100644 --- a/evadb/catalog/models/utils.py +++ b/evadb/catalog/models/utils.py @@ -38,7 +38,7 @@ class TextPickleType(TypeDecorator): https://stackoverflow.com/questions/1378325/python-dicts-in-sqlalchemy """ - impl = sqlalchemy.Text(1024) + impl = sqlalchemy.String(1024) def process_bind_param(self, value, dialect): if value is not None: