From 8d55c64f2d2f1b5dfa31f39fd84e6784682d7cc7 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Fri, 29 Nov 2024 11:21:46 +0100 Subject: [PATCH 01/24] Add api documentation for connection and ExaConnection --- doc/api.rst | 8 ++++ pyexasol/__init__.py | 4 +- pyexasol/connection.py | 97 ++++++++++++++++++++++++++++-------------- 3 files changed, 75 insertions(+), 34 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 246b507..d269fc5 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -3,3 +3,11 @@ :octicon:`cpu` API Reference ============================= +.. autofunction:: pyexasol.connect + +.. autoclass:: pyexasol.ExaConnection + :class-doc-from: init + :members: + :undoc-members: + :show-inheritance: + diff --git a/pyexasol/__init__.py b/pyexasol/__init__.py index 29a4214..776df98 100644 --- a/pyexasol/__init__.py +++ b/pyexasol/__init__.py @@ -62,8 +62,8 @@ def connect(**kwargs) -> ExaConnection: """ - Constructor of connection objects - Please check ExaConnection object for list of arguments + Create a new connection object. For details regarding kwargs, + refer to the :class:`pyexasol.ExaConnection` class. """ return ExaConnection(**kwargs) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index f5f5dd6..4d9a2bb 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -92,38 +92,71 @@ def __init__(self """ Exasol connection object - :param dsn: Connection string, same format as standard JDBC / ODBC drivers (e.g. 10.10.127.1..11:8564) - :param user: Username - :param password: Password - :param schema: Open schema after connection (Default: '', no schema) - :param autocommit: Enable autocommit on connection (Default: True) - :param snapshot_transactions: Explicitly enable or disable snapshot transactions on connection (Default: None, database default) - :param connection_timeout: Socket timeout in seconds used to establish connection (Default: 10) - :param socket_timeout: Socket timeout in seconds used for requests after connection was established (Default: 30) - :param query_timeout: Maximum execution time of queries before automatic abort, in seconds (Default: 0, no timeout) - :param compression: Use zlib compression both for WebSocket and HTTP transport (Default: False) - :param encryption: Use SSL to encrypt client-server communications for WebSocket and HTTP transport (Default: True) - :param fetch_dict: Fetch result rows as dicts instead of tuples (Default: False) - :param fetch_mapper: Use custom mapper function to convert Exasol values into Python objects during fetching (Default: None) - :param fetch_size_bytes: Maximum size of data message for single fetch request in bytes (Default: 5Mb) - :param lower_ident: Automatically lowercase identifiers (table names, column names, etc.) returned from relevant functions (Default: False) - :param quote_ident: Add double quotes and escape identifiers passed to relevant functions (export_*, import_*, ext.*, etc.) (Default: False) - :param json_lib: Supported values: rapidjson, ujson, orjson, json (Default: json) - :param verbose_error: Display additional information when error occurs (Default: True) - :param debug: Output debug information for client-server communication and connection attempts to STDERR - :param debug_logdir: Store debug information into files in debug_logdir instead of outputting it to STDERR - :param udf_output_bind_address: Specific server_address to bind TCP server for UDF script output (default: ('', 0)) - :param udf_output_connect_address: Specific SCRIPT_OUTPUT_ADDRESS value to connect from Exasol to UDF script output server (default: inherited from TCP server) - :param udf_output_dir: Directory to store captured UDF script output logs, split by _/ - :param http_proxy: HTTP proxy string in Linux http_proxy format (default: None) - :param resolve_hostnames: Explicitly resolve host names to IP addresses before connecting. Deactivating this will let the operating system resolve the host name (default: True) - :param client_name: Custom name of client application displayed in Exasol sessions tables (Default: PyEXASOL) - :param client_version: Custom version of client application (Default: pyexasol.__version__) - :param client_os_username: Custom OS username displayed in Exasol sessions table (Default: getpass.getuser()) - :param protocol_version: Major WebSocket protocol version requested for connection (Default: pyexasol.PROTOCOL_V3) - :param websocket_sslopt: Set custom SSL options for WebSocket client (Default: None) - :param access_token: OpenID access token to use for the login process - :param refresh_token: OpenID refresh token to use for the login process + Args: + dsn: + Connection string, same format as standard JDBC / ODBC drivers (e.g. 10.10.127.1..11:8564) + user: + Username + password: + Password + schema: + Open schema after connection (Default: '', no schema) + autocommit: + Enable autocommit on connection (Default: True) + snapshot_transactions: + Explicitly enable or disable snapshot transactions on connection (Default: None, database default) + connection_timeout: + Socket timeout in seconds used to establish connection (Default: 10) + socket_timeout: + Socket timeout in seconds used for requests after connection was established (Default: 30) + query_timeout: + Maximum execution time of queries before automatic abort, in seconds (Default: 0, no timeout) + compression: + Use zlib compression both for WebSocket and HTTP transport (Default: False) + encryption: + Use SSL to encrypt client-server communications for WebSocket and HTTP transport (Default: True) + fetch_dict: + Fetch result rows as dicts instead of tuples (Default: False) + fetch_mapper: + Use custom mapper function to convert Exasol values into Python objects during fetching (Default: None) + fetch_size_bytes: + Maximum size of data message for single fetch request in bytes (Default: 5Mb) + lower_ident: + Automatically lowercase identifiers (table names, column names, etc.) returned from relevant functions (Default: False) + quote_ident: + Add double quotes and escape identifiers passed to relevant functions (export_*, import_*, ext.*, etc.) (Default: False) + json_lib: + Supported values: rapidjson, ujson, orjson, json (Default: json) + verbose_error: + Display additional information when error occurs (Default: True) + debug: + Output debug information for client-server communication and connection attempts to STDERR + debug_logdir: + Store debug information into files in debug_logdir instead of outputting it to STDERR + udf_output_bind_address: + Specific server_address to bind TCP server for UDF script output (default: ('', 0)) + udf_output_connect_address: + Specific SCRIPT_OUTPUT_ADDRESS value to connect from Exasol to UDF script output server (default: inherited from TCP server) + udf_output_dir: + Directory to store captured UDF script output logs, split by _/ + http_proxy: + HTTP proxy string in Linux http_proxy format (default: None) + resolve_hostnames: + Explicitly resolve host names to IP addresses before connecting. Deactivating this will let the operating system resolve the host name (default: True) + client_name: + Custom name of client application displayed in Exasol sessions tables (Default: PyEXASOL) + client_version: + Custom version of client application (Default: pyexasol.__version__) + client_os_username: + Custom OS username displayed in Exasol sessions table (Default: getpass.getuser()) + protocol_version: + Major WebSocket protocol version requested for connection (Default: pyexasol.PROTOCOL_V3) + websocket_sslopt: + Set custom SSL options for WebSocket client (Default: None) + access_token: + OpenID access token to use for the login process + refresh_token: + OpenID refresh token to use for the login process """ self.options = { From f10530dcf0f1e24fc4d1b7533ca30153e775d1ef Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Fri, 29 Nov 2024 11:38:07 +0100 Subject: [PATCH 02/24] Migrate all pydoc comments to Napoleon docstring style comments --- pyexasol/connection.py | 108 +++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 37 deletions(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index 4d9a2bb..e8161de 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -234,18 +234,24 @@ def __init__(self self.get_attr() def execute(self, query, query_params=None) -> ExaStatement: + """ Execute SQL query with optional query formatting parameters - Return ExaStatement object + + Returns: + ExaStatement object """ return self.cls_statement(self, query, query_params) def execute_udf_output(self, query, query_params=None): """ Execute SQL query with UDF script, capture output - Return ExaStatement object and list of Path-objects for script output log files - Exasol should be able to open connection to the machine where current script is running + Returns: + Return ExaStatement object and list of Path-objects for script output log files + + Attention: + Exasol should be able to open connection to the machine where current script is running """ stmt_output_dir = self._get_stmt_output_dir() @@ -432,8 +438,10 @@ def import_from_callback(self, callback, src, table, callback_params=None, impor def export_parallel(self, exa_address_list, query_or_table, query_params=None, export_params=None): """ Init HTTP transport in child processes first using pyexasol.http_transport() - Get internal Exasol address from each child process using .address - Pass address strings to parent process, combine into single list and use it for export_parallel() call + + Note: + Get internal Exasol address from each child process using .address + Pass address strings to parent process, combine into single list and use it for export_parallel() call """ if export_params is None: export_params = {} @@ -452,8 +460,10 @@ def export_parallel(self, exa_address_list, query_or_table, query_params=None, e def import_parallel(self, exa_address_list, table, import_params=None): """ Init HTTP transport in child processes first using pyexasol.http_transport() - Get internal Exasol address from each child process using .address - Pass address strings to parent process, combine into single list and use it for import_parallel() call + + Note: + Get internal Exasol address from each child process using .address + Pass address strings to parent process, combine into single list and use it for import_parallel() call """ if import_params is None: import_params = {} @@ -472,9 +482,12 @@ def session_id(self): def protocol_version(self): """ Return WebSocket protocol version of opened connection - Return 0 if connection was not established yet (e.g. due to exception handling) - Actual Protocol version might be downgraded from requested protocol version if Exasol server does not support it + Returns: + 0 if connection was not established yet (e.g. due to exception handling) + + Warnings: + Actual Protocol version might be downgraded from requested protocol version if Exasol server does not support it """ return int(self.login_info.get('protocolVersion', 0)) @@ -482,8 +495,9 @@ def last_statement(self) -> ExaStatement: """ Return last created ExaStatement object - It is mainly used for HTTP transport to access internal IMPORT / EXPORT query, - measure execution time and number of rows + Info: + It is mainly used for HTTP transport to access internal IMPORT / EXPORT query, + measure execution time and number of rows """ if self.last_stmt is None: raise ExaRuntimeError(self, 'Last statement not found') @@ -492,11 +506,13 @@ def last_statement(self) -> ExaStatement: def close(self, disconnect=True): """ - Close connection to Exasol by sending CLOSE websocket frame + Close connection to Exasol by sending CLOSE websocket frame. + Send optional "disconnect" command to free resources and close session on Exasol server side properly - Please note that "disconnect" should always be False when .close() is being called from .req()-like functions - to prevent an infinite loop if websocket exception happens during handling of "disconnect" command + Info: + Please note that "disconnect" should always be False when .close() is being called from .req()-like functions + to prevent an infinite loop if websocket exception happens during handling of "disconnect" command """ if self._ws.connected: if disconnect: @@ -528,14 +544,17 @@ def set_attr(self, new_attr): def get_nodes(self, pool_size=None): """ - Return list of dictionaries describing active Exasol nodes - Format: {'ipaddr': , 'port': , 'idx': } + Format: ``{'ipaddr': , 'port': , 'idx': }`` + - If pool_size is bigger than number of nodes, list will wrap around and nodes will repeat with different 'idx' - If pool_size is omitted, return every active node once + Info: - It is useful to balance workload for parallel IMPORT and EXPORT - Exasol shuffles list for every connection + - If pool_size is bigger than number of nodes, list will wrap around and nodes will repeat with different 'idx' + - If pool_size is omitted, return every active node once + - It is useful to balance workload for parallel IMPORT and EXPORT Exasol shuffles list for every connection + + Returns: + list of dictionaries describing active Exasol nodes """ ret = self.req({ 'command': 'getHosts', @@ -611,13 +630,17 @@ def req(self, req): def abort_query(self): """ Abort running query - This function should be called from a separate thread and has no response - Response should be checked in the main thread which started execution of query - There are three possible outcomes of calling this function: - 1) Query is aborted normally, connection remains active - 2) Query was stuck in a state which cannot be aborted, so Exasol has to terminate connection - 3) Query might be finished successfully before abort call had a chance to take effect + Warnings: + + This function should be called from a separate thread and has no response + Response should be checked in the main thread which started execution of query + + There are three possible outcomes of calling this function: + + #. Query is aborted normally, connection remains active + #. Query was stuck in a state which cannot be aborted, so Exasol has to terminate connection + #. Query might be finished successfully before abort call had a chance to take effect """ req = { 'command': 'abortQuery' @@ -694,8 +717,10 @@ def _encrypt_password(self, public_key_pem): def _init_ws(self): """ Init websocket connection - Connection redundancy is supported - Specific Exasol node is randomly selected for every connection attempt + + Info: + - Connection redundancy is supported + - Specific Exasol node is randomly selected for every connection attempt """ dsn_items = self._process_dsn(self.options['dsn']) failed_attempts = 0 @@ -791,9 +816,13 @@ def _get_login_attributes(self): def _process_dsn(self, dsn: str) -> list[Host]: """ - Parse DSN, expand ranges and resolve IP addresses for all hostnames - Return list of (hostname, ip_address, port) tuples in random order - Randomness is required to guarantee proper distribution of workload across all nodes + Parse DSN, expand ranges and resolve IP addresses for all hostnames. + + Info: + Randomness is required to guarantee proper distribution of workload across all nodes + + Returns: + List of (hostname, ip_address, port) tuples in random order """ if dsn is None or len(dsn.strip()) == 0: raise ExaConnectionDsnError(self, 'Connection string is empty') @@ -859,8 +888,10 @@ def _process_dsn(self, dsn: str) -> list[Host]: def _resolve_hostname(self, hostname: str, port: int, fingerprint: Optional[str]) -> list[Host]: """ - Resolve all IP addresses for hostname and add port - It also implicitly checks that all hostnames mentioned in DSN can be resolved + Resolve all IP addresses for hostname and add port. + + Warnings: + - It also implicitly checks that all hostnames mentioned in DSN can be resolved """ try: hostname, _, ipaddr_list = socket.gethostbyname_ex(hostname) @@ -948,11 +979,14 @@ def __exit__(self, exc_type, exc_value, traceback): def __del__(self): """ - close() is being called automatically in order to: + Will close the connection. + + Info: + close() is being called automatically in order to: - 1) send OP_CLOSE frame to Exasol server rather than silently terminating the socket on client side - 2) make sure connection is closed immediately even if garbage collection was disabled for any reasons - 3) write debug logs + #. send OP_CLOSE frame to Exasol server rather than silently terminating the socket on client side + #. make sure connection is closed immediately even if garbage collection was disabled for any reasons + #. write debug logs """ # Based on our investigations, two scenarios have emerged, one of which does not function correctly: # From a8ad894e0c6b8f20f3bcb769e141b3351a7c48e2 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Fri, 29 Nov 2024 13:03:40 +0100 Subject: [PATCH 03/24] Add api documentation for connect_local_config function --- doc/api.rst | 2 ++ pyexasol/__init__.py | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index d269fc5..b441ce1 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -5,6 +5,8 @@ .. autofunction:: pyexasol.connect +.. autofunction:: pyexasol.connect_local_config + .. autoclass:: pyexasol.ExaConnection :class-doc-from: init :members: diff --git a/pyexasol/__init__.py b/pyexasol/__init__.py index 776df98..c96a7b4 100644 --- a/pyexasol/__init__.py +++ b/pyexasol/__init__.py @@ -70,14 +70,20 @@ def connect(**kwargs) -> ExaConnection: def connect_local_config(config_section, config_path=None, **kwargs) -> ExaConnection: """ - Constructor of connection objects based on local config file - Default config path is ~/.pyexasol.ini + Constructor for connection objects based on a local config file. - Extra arguments override values from config + Info: + - The default config path is ~/.pyexasol.ini + - Extra arguments override values from config - :param config_section: Name of config section (required!) - :param config_path: Custom path to local config file - :param kwargs: Arguments for "connect()" function + Args: + + config_section: + Name of config section (required!) + config_path: + Custom path to local config file + kwargs: + Arguments for "connect()" function """ conf = ExaLocalConfig(config_path) From 835d9a8b16588ad07d27a60577627de980e4c9aa Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Fri, 29 Nov 2024 13:12:47 +0100 Subject: [PATCH 04/24] Add api documentation for http_transport function --- doc/api.rst | 2 ++ pyexasol/__init__.py | 58 ++++++++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index b441ce1..4fa5dda 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -7,6 +7,8 @@ .. autofunction:: pyexasol.connect_local_config +.. autofunction:: pyexasol.http_transport + .. autoclass:: pyexasol.ExaConnection :class-doc-from: init :members: diff --git a/pyexasol/__init__.py b/pyexasol/__init__.py index c96a7b4..da3cfb7 100644 --- a/pyexasol/__init__.py +++ b/pyexasol/__init__.py @@ -94,28 +94,40 @@ def connect_local_config(config_section, config_path=None, **kwargs) -> ExaConne def http_transport(ipaddr, port, compression=False, encryption=True) -> ExaHTTPTransportWrapper: """ - Constructor of HTTP Transport wrapper for parallel HTTP Transport (EXPORT or IMPORT) - Compression and encryption arguments should match pyexasol.connect() - - How to use: - 1) Parent process opens main connection to Exasol with pyexasol.connect() - 2) - 2) Parent process creates any number of child processes (possibly on remote host or another container) - 3) Every child process starts HTTP transport sub-connection with pyexasol.http_transport() - and gets "ipaddr:port" string using ExaHTTPTransportWrapper.address - 4) Every child process sends address string to parent process using any communication method (Pipe, Queue, Redis, etc.) - 5) Parent process runs .export_parallel() or .import_parallel(), which initiates EXPORT or IMPORT query in Exasol - 6) Every child process receives or sends a chunk of data using ExaHTTPTransportWrapper.export_*() or .import_*() - 7) Parent process waits for Exasol query and for child processes to finish - - All child processes should run in parallel. - It is NOT possible to process some data first, and process some more data later. - - If an exception is raised in child process, it will close the pipe used for HTTP transport. - Closing the pipe prematurely will cause SQL query to fail and will raise an exception in parent process. - Parent process is responsible for closing other child processes and cleaning up. - - PyEXASOL does not provide a complete solution to manage child processes, only examples. - The final solution depends on your hardware, network configuration, cloud provider and container orchestration software. + Constructor for HTTP Transport wrapper for parallel HTTP Transport (EXPORT or IMPORT) + + Args: + ipaddr: + IP address of one of Exasol nodes received from :meth:`pyexasol.ExaConnection.get_nodes` + port: + Port of one of Exasol nodes received from :meth:`pyexasol.ExaConnection.get_nodes` + compression: + Use zlib compression for HTTP transport, must be the same as `compression` of main connection + encryption: + Use SSL encryption for HTTP transport, must be the same as `encryption` of main connection + + Info: + Compression and encryption arguments should match :func:`pyexasol.connect` + + How to use: + + #. Parent process opens main connection to Exasol with pyexasol.connect() + #. Parent process creates any number of child processes (possibly on remote host or another container) + #. Every child process starts HTTP transport sub-connection with pyexasol.http_transport() + #. and gets "ipaddr:port" string using ExaHTTPTransportWrapper.address + #. Every child process sends address string to parent process using any communication method (Pipe, Queue, Redis, etc.) + #. Parent process runs .export_parallel() or .import_parallel(), which initiates EXPORT or IMPORT query in Exasol + #. Every child process receives or sends a chunk of data using ExaHTTPTransportWrapper.export_*() or .import_*() + #. Parent process waits for Exasol query and for child processes to finish + + All child processes should run in parallel. + It is NOT possible to process some data first, and process some more data later. + + If an exception is raised in child process, it will close the pipe used for HTTP transport. + Closing the pipe prematurely will cause SQL query to fail and will raise an exception in parent process. + Parent process is responsible for closing other child processes and cleaning up. + + PyEXASOL does not provide a complete solution to manage child processes, only examples. + The final solution depends on your hardware, network configuration, cloud provider and container orchestration software. """ return ExaHTTPTransportWrapper(ipaddr, port, compression, encryption) From 340b1ca1a8745d2a33326f263529ab88e0b2c09a Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Fri, 29 Nov 2024 13:35:28 +0100 Subject: [PATCH 05/24] Add all relevant classes and functions to api docs --- doc/api.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 4fa5dda..80c2a01 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -15,3 +15,32 @@ :undoc-members: :show-inheritance: +.. autoclass:: pyexasol.ExaStatement + :class-doc-from: init + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: pyexasol.ExaFormatter + :class-doc-from: init + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: pyexasol.ExaMetaData + :class-doc-from: init + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: pyexasol.ExaExtension + :class-doc-from: init + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: pyexasol.ExaHTTPTransportWrapper + :class-doc-from: init + :members: + :undoc-members: + :show-inheritance: From 29605a99c8246b66583e374efdd2816db8c6067b Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Fri, 29 Nov 2024 14:00:23 +0100 Subject: [PATCH 06/24] Migrate documentation of ExaConnection to doc strings --- doc/api.rst | 3 +- doc/user_guide/protocol_version.rst | 2 + pyexasol/connection.py | 439 ++++++++++++++++++++++++---- 3 files changed, 382 insertions(+), 62 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 80c2a01..9ba9806 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -10,8 +10,7 @@ .. autofunction:: pyexasol.http_transport .. autoclass:: pyexasol.ExaConnection - :class-doc-from: init - :members: + :members: __init__ :undoc-members: :show-inheritance: diff --git a/doc/user_guide/protocol_version.rst b/doc/user_guide/protocol_version.rst index 100a361..763b1a5 100644 --- a/doc/user_guide/protocol_version.rst +++ b/doc/user_guide/protocol_version.rst @@ -1,3 +1,5 @@ +.. _protocol_version: + WebSocket protocol versions =========================== diff --git a/pyexasol/connection.py b/pyexasol/connection.py index e8161de..18c1d73 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -38,21 +38,38 @@ class Host(NamedTuple): port: int fingerprint: Optional[str] + class ExaConnection(object): + """ + Warning: + Threads may share the module, but not connections + One connection may be used by different threads, just not at the same time + :meth:`pyexasol.ExaConnection.abort_query` is an exception, + it is meant to be called from another thread + + Note: + + It is advisable to use multiprocessing instead of threading and create + new connection in each sub-process + + Public Attributes: + ``attr``: + Read-only `dict` of attributes of current connection. + + ``login_info``: + Read-only ``dict`` of login information returned by second + response of LOGIN command. + + ``options``: + Read-only ``dict`` of arguments passed to + :meth:`pyexasol.ExaConnection.connect`. + """ cls_statement = ExaStatement cls_formatter = ExaFormatter cls_logger = ExaLogger cls_extension = ExaExtension cls_meta = ExaMetaData - """ - Threads may share the module, but not connections - One connection may be used by different threads, just not at the same time - - .abort_query() is an exception, it is meant to be called from another thread - - It is advisable to use multiprocessing instead of threading and create new connection in each sub-process - """ threadsafety = 1 def __init__(self @@ -93,66 +110,98 @@ def __init__(self Exasol connection object Args: - dsn: - Connection string, same format as standard JDBC / ODBC drivers (e.g. 10.10.127.1..11:8564) - user: + dsn: + Connection string, same format as standard JDBC / ODBC drivers + (e.g. 10.10.127.1..11:8564) + user: Username password: Password schema: - Open schema after connection (Default: '', no schema) + Open schema after connection + (Default: '', no schema) autocommit: - Enable autocommit on connection (Default: True) + Enable autocommit on connection + (Default: True) snapshot_transactions: - Explicitly enable or disable snapshot transactions on connection (Default: None, database default) + Explicitly enable or disable snapshot transactions on connection + (Default: None, database default) connection_timeout: - Socket timeout in seconds used to establish connection (Default: 10) + Socket timeout in seconds used to establish connection + (Default: 10) socket_timeout: - Socket timeout in seconds used for requests after connection was established (Default: 30) + Socket timeout in seconds used for requests after connection was established + (Default: 30) query_timeout: - Maximum execution time of queries before automatic abort, in seconds (Default: 0, no timeout) + Maximum execution time of queries before automatic abort, in seconds + (Default: 0, no timeout) compression: - Use zlib compression both for WebSocket and HTTP transport (Default: False) + Use zlib compression both for WebSocket and HTTP transport + (Default: False) encryption: - Use SSL to encrypt client-server communications for WebSocket and HTTP transport (Default: True) + Use SSL to encrypt client-server communications for WebSocket and HTTP transport + (Default: True) fetch_dict: Fetch result rows as dicts instead of tuples (Default: False) fetch_mapper: - Use custom mapper function to convert Exasol values into Python objects during fetching (Default: None) + Use custom mapper function to convert Exasol values into + Python objects during fetching + (Default: None) fetch_size_bytes: - Maximum size of data message for single fetch request in bytes (Default: 5Mb) + Maximum size of data message for single fetch request in bytes + (Default: 5Mb) lower_ident: - Automatically lowercase identifiers (table names, column names, etc.) returned from relevant functions (Default: False) + Automatically lowercase identifiers (table names, column names, etc.) + returned from relevant functions + (Default: False) quote_ident: - Add double quotes and escape identifiers passed to relevant functions (export_*, import_*, ext.*, etc.) (Default: False) + Add double quotes and escape identifiers passed to relevant functions + (export_*, import_*, ext.*, etc.) + (Default: False) json_lib: - Supported values: rapidjson, ujson, orjson, json (Default: json) + Supported values: rapidjson, ujson, orjson, json + (Default: json) verbose_error: - Display additional information when error occurs (Default: True) + Display additional information when error occurs + (Default: True) debug: - Output debug information for client-server communication and connection attempts to STDERR + Output debug information for client-server communication and + connection attempts to STDERR debug_logdir: - Store debug information into files in debug_logdir instead of outputting it to STDERR + Store debug information into files in debug_logdir instead of + outputting it to STDERR udf_output_bind_address: - Specific server_address to bind TCP server for UDF script output (default: ('', 0)) + Specific server_address to bind TCP server for UDF script output + (default: ('', 0)) udf_output_connect_address: - Specific SCRIPT_OUTPUT_ADDRESS value to connect from Exasol to UDF script output server (default: inherited from TCP server) + Specific SCRIPT_OUTPUT_ADDRESS value to connect from Exasol to + UDF script output server + (default: inherited from TCP server) udf_output_dir: - Directory to store captured UDF script output logs, split by _/ + Directory to store captured UDF script output logs, split by + _/ http_proxy: - HTTP proxy string in Linux http_proxy format (default: None) + HTTP proxy string in Linux http_proxy format + (default: None) resolve_hostnames: - Explicitly resolve host names to IP addresses before connecting. Deactivating this will let the operating system resolve the host name (default: True) + Explicitly resolve host names to IP addresses before connecting. + Deactivating this will let the operating system resolve the host name + (default: True) client_name: - Custom name of client application displayed in Exasol sessions tables (Default: PyEXASOL) + Custom name of client application displayed in Exasol sessions tables + (Default: PyEXASOL) client_version: - Custom version of client application (Default: pyexasol.__version__) + Custom version of client application + (Default: pyexasol.__version__) client_os_username: - Custom OS username displayed in Exasol sessions table (Default: getpass.getuser()) + Custom OS username displayed in Exasol sessions table + (Default: getpass.getuser()) protocol_version: - Major WebSocket protocol version requested for connection (Default: pyexasol.PROTOCOL_V3) + Major WebSocket protocol version requested for connection + (Default: pyexasol.PROTOCOL_V3) websocket_sslopt: - Set custom SSL options for WebSocket client (Default: None) + Set custom SSL options for WebSocket client + (Default: None) access_token: OpenID access token to use for the login process refresh_token: @@ -234,12 +283,25 @@ def __init__(self self.get_attr() def execute(self, query, query_params=None) -> ExaStatement: - """ Execute SQL query with optional query formatting parameters + Args: + query: + SQL query text, possibly with placeholders + query_params: + Values for placeholders + Returns: ExaStatement object + + Examples: + + >>> con = ExaConnection(...) + >>> con.execute( + ... query="SELECT * FROM {table!i} WHERE col1={col1}", + ... query_params={'table': 'users', 'col1':'bar'} + ...) """ return self.cls_statement(self, query, query_params) @@ -247,11 +309,30 @@ def execute_udf_output(self, query, query_params=None): """ Execute SQL query with UDF script, capture output + Note: + Exasol should be able to open connection to the machine where current script is running. + It is usually OK in the same data centre, but it is normally not working + if you try to run this function on local laptop. + + Args: + query: + SQL query text, possibly with placeholders + query_params: + Values for placeholders | + Returns: - Return ExaStatement object and list of Path-objects for script output log files + Return tuple with two elements: (1) instance of :class:`pyexasol.ExaStatement` + and (2) list of :class:`Path` objects for script output log files. Attention: Exasol should be able to open connection to the machine where current script is running + + Examples: + >>> con = ExaConnection(...) + >>> con.execute( + ... query="SELECT * FROM {table!i} WHERE col1={col1}", + ... query_params={'table': 'users', 'col1':'bar'} + ...) """ stmt_output_dir = self._get_stmt_output_dir() @@ -291,12 +372,27 @@ def execute_udf_output(self, query, query_params=None): return stmt, log_files def commit(self): + """Wrapper for query 'COMMIT'""" return self.execute('COMMIT') def rollback(self): + """Wrapper for query 'ROLLBACK'""" return self.execute('ROLLBACK') def set_autocommit(self, val): + """ + Set autocommit mode. + + Args: + val: + Set ``False`` to execute following statements in transaction. + Set ``True`` to get back to automatic COMMIT after each statement. + + Note: + Autocommit is ``True`` by default because Exasol has to commit indexes and statistics + objects even for pure SELECT statements. Lack of default COMMIT may lead to serious + performance degradation. + """ if not isinstance(val, bool): raise ValueError("Autocommit value must be boolean") @@ -305,25 +401,119 @@ def set_autocommit(self, val): }) def set_query_timeout(self, val): + """ + Set the maximum time in seconds for which a query can run before Exasol kills it automatically. + + Args: + val: + Timeout value in seconds. + Set value ``0`` to disable timeout. + + Note: + It is highly recommended to set timeout for UDF scripts to + avoid potential infinite loops and very long transactions. + """ self.set_attr({ 'queryTimeout': int(val) }) def open_schema(self, schema): + """ + Wrapper for `OPEN SCHEMA` + + Args: + schema: Schema name + """ self.set_attr({ 'currentSchema': self.format.default_format_ident_value(schema) }) def current_schema(self): + """ + Get the name of the current schema. + + Returns: + Name of currently opened schema. Return empty string if no schema was opened. + """ return self.attr.get('currentSchema', '') def export_to_file(self, dst, query_or_table, query_params=None, export_params=None): + """ + Export large amount of data from Exasol to file or file-like object using fast HTTP transport. + + Note: + File must be opened in binary mode. + + Args: + dst: + Path to file or file-like object. + query_or_table: + SQL query or table for export. + query_params: + Values for SQL query placeholders. + export_params: + Custom parameters for Export query. + + Examples: + >>> con = ExaConnection(...) + >>> with open('/tmp/file.csv', 'wb') as f: + ... con.export_to_file( + ... dst=f, + ... query_or_table="SELECT * FROM table" + ... ) + """ return self.export_to_callback(cb.export_to_file, dst, query_or_table, query_params, None, export_params) def export_to_list(self, query_or_table, query_params=None, export_params=None): + """ + Export large amount of data from Exasol to basic Python `list` using fast HTTP transport. + + Args: + query_or_table: + SQL query or table for export. + query_params: + Values for SQL query placeholders. + export_params: + Custom parameters for Export query. + + Returns: + `list` of `tuples` + + Warnings: + - This function may run out of memory + + Examples: + >>> con = ExaConnection(...) + >>> con.export_to_list( + ... query_or_table="SELECT * FROM table" + ... ) + """ return self.export_to_callback(cb.export_to_list, None, query_or_table, query_params, None, export_params) def export_to_pandas(self, query_or_table, query_params=None, callback_params=None, export_params=None): + """ + Export large amount of data from Exasol to :class:`pandas.DataFrame`. + + Args: + query_or_table: + SQL query or table for export. + query_params: + Values for SQL query placeholders. + export_params: + Custom parameters for Export query. + + Returns: + instance of :class:`pandas.DataFrame` + + Warnings: + - This function may run out of memory + + Examples: + >>> con = ExaConnection(...) + >>> con.export_to_pandas( + ... query_or_table="SELECT * FROM table" + ... ) + """ if not export_params: export_params = {} @@ -332,15 +522,79 @@ def export_to_pandas(self, query_or_table, query_params=None, callback_params=No return self.export_to_callback(cb.export_to_pandas, None, query_or_table, query_params, callback_params, export_params) def import_from_file(self, src, table, import_params=None): + """ + Import a large amount of data from a file or file-like object. + + Args: + src: + Source file or file-like object. + table: + Destination table for IMPORT. + import_params: + Custom parameters for import query. + + Note: + File must be opened in binary mode. + """ return self.import_from_callback(cb.import_from_file, src, table, None, import_params) def import_from_iterable(self, src, table, import_params=None): + """ + Import a large amount of data from an ``iterable`` Python object. + + Args: + src: + Source object implementing ``__iter__``. + Iterator must return tuples of values. + table: + Destination table for IMPORT. + import_params: + Custom parameters for import query. + """ return self.import_from_callback(cb.import_from_iterable, src, table, None, import_params) def import_from_pandas(self, src, table, callback_params=None, import_params=None): + """ + Import a large amount of data from ``pandas.DataFrame``. + + Args: + src: + Source ``pandas.DataFrame`` instance. + table: + Destination table for IMPORT. + import_params: + Custom parameters for import query. + """ return self.import_from_callback(cb.import_from_pandas, src, table, callback_params, import_params) def export_to_callback(self, callback, dst, query_or_table, query_params=None, callback_params=None, export_params=None): + """ + Export large amount of data to user-defined callback function + + Args: + callback: + Callback function + query_or_table: + SQL query or table for export. + query_params: + Values for SQL query placeholders. + export_params: + Custom parameters for Export query. + + Returns: + result of callback function + + Warnings: + - This function may run out of memory + + Examples: + >>> cb = lamda args: print(args) + >>> con = ExaConnection(...) + >>> con.export_to_callback( + ... callback=cb, + ... query_or_table="SELECT * FROM table" + ... ) + """ if not callable(callback): raise ValueError('Callback argument is not callable') @@ -390,6 +644,24 @@ def export_to_callback(self, callback, dst, query_or_table, query_params=None, c raise e def import_from_callback(self, callback, src, table, callback_params=None, import_params=None): + """ + Import a large amount of data from a user-defined callback function. + + Args: + callback: + Callback function. + src: + Source for the callback function. + table: + Destination table for IMPORT. + callback_params: + Dict with additional parameters for callback function + import_params: + Custom parameters for IMPORT query. + + Raises: + ValueError: callback argument isn't callable. + """ if callback_params is None: callback_params = {} @@ -437,11 +709,21 @@ def import_from_callback(self, callback, src, table, callback_params=None, impor def export_parallel(self, exa_address_list, query_or_table, query_params=None, export_params=None): """ - Init HTTP transport in child processes first using pyexasol.http_transport() + This function is part of :ref:`http_transport_parallel` API. + Args: + exa_address_list: + List of ``ipaddr:port`` strings obtained from HTTP transport ``.address``. + query_or_table: + SQL query or table for the export. + query_params: + Values for SQL query placeholders. + export_params: + Custom parameters for Export query. Note: - Get internal Exasol address from each child process using .address - Pass address strings to parent process, combine into single list and use it for export_parallel() call + - Init HTTP transport in child processes first using pyexasol.http_transport() + - Get internal Exasol address from each child process using .address + - Pass address strings to parent process, combine into single list and use it for export_parallel() call """ if export_params is None: export_params = {} @@ -459,11 +741,21 @@ def export_parallel(self, exa_address_list, query_or_table, query_params=None, e def import_parallel(self, exa_address_list, table, import_params=None): """ - Init HTTP transport in child processes first using pyexasol.http_transport() + This function is part of :ref:`http_transport_parallel` API. + + Args: + exa_address_list: + List of ``ipaddr:port`` strings obtained from HTTP transport ``.address``. + table: + Table to import to. + import_params: + Custom parameters for import. Note: - Get internal Exasol address from each child process using .address - Pass address strings to parent process, combine into single list and use it for import_parallel() call + - Init HTTP transport in child processes first using pyexasol.http_transport() + - Get internal Exasol address from each child process using .address + - Pass address strings to parent process, combine into single list and use it for import_parallel() call + """ if import_params is None: import_params = {} @@ -477,27 +769,46 @@ def import_parallel(self, exa_address_list, table, import_params=None): sql_thread.run_sql() def session_id(self): + """ + Session id of current session. + + Returns: + Unique `SESSION_ID` of the current session. + """ return str(self.login_info.get('sessionId', '')) def protocol_version(self): """ - Return WebSocket protocol version of opened connection + Actual protocol version used by the the established connection. Returns: - 0 if connection was not established yet (e.g. due to exception handling) + ``0`` if connection was not established yet (e.g. due to exception handling) Warnings: Actual Protocol version might be downgraded from requested protocol version if Exasol server does not support it + + Note: + The actual protocol version may be lower than the requested protocol version + defined by the ``protocol_version`` connection option. For further details, + refer to :ref:`protocol_version`. + """ return int(self.login_info.get('protocolVersion', 0)) def last_statement(self) -> ExaStatement: """ - Return last created ExaStatement object + Last created statement object - Info: + Returns: + ExaStatement: last created statement. + + Note: It is mainly used for HTTP transport to access internal IMPORT / EXPORT query, measure execution time and number of rows + + Tip: + It is useful while working with `export_*` and `import_*` functions normally + returning result of callback function instead of statement object. """ if self.last_stmt is None: raise ExaRuntimeError(self, 'Last statement not found') @@ -506,11 +817,13 @@ def last_statement(self) -> ExaStatement: def close(self, disconnect=True): """ - Close connection to Exasol by sending CLOSE websocket frame. + Closes connection to database. - Send optional "disconnect" command to free resources and close session on Exasol server side properly + Args: + disconnect: + If ``true`` send optional "disconnect" command to free resources and close session on Exasol server side properly. - Info: + Note: Please note that "disconnect" should always be False when .close() is being called from .req()-like functions to prevent an infinite loop if websocket exception happens during handling of "disconnect" command """ @@ -544,17 +857,23 @@ def set_attr(self, new_attr): def get_nodes(self, pool_size=None): """ - Format: ``{'ipaddr': , 'port': , 'idx': }`` + List of currently active Exasol nodes which is normally used for :ref:`http_transport_parallel`. + + Args: + pool_size: + Return list of specific size. + Returns: + list of dictionaries describing active Exasol nodes - Info: + Note: + + Format: ``{'ipaddr': , 'port': , 'idx': }`` - If pool_size is bigger than number of nodes, list will wrap around and nodes will repeat with different 'idx' - If pool_size is omitted, return every active node once - It is useful to balance workload for parallel IMPORT and EXPORT Exasol shuffles list for every connection - - Returns: - list of dictionaries describing active Exasol nodes + - Exasol shuffles list for every connection. """ ret = self.req({ 'command': 'getHosts', @@ -718,7 +1037,7 @@ def _init_ws(self): """ Init websocket connection - Info: + Note: - Connection redundancy is supported - Specific Exasol node is randomly selected for every connection attempt """ @@ -818,7 +1137,7 @@ def _process_dsn(self, dsn: str) -> list[Host]: """ Parse DSN, expand ranges and resolve IP addresses for all hostnames. - Info: + Note: Randomness is required to guarantee proper distribution of workload across all nodes Returns: @@ -981,7 +1300,7 @@ def __del__(self): """ Will close the connection. - Info: + Note: close() is being called automatically in order to: #. send OP_CLOSE frame to Exasol server rather than silently terminating the socket on client side From 8b2e842fe652ade755b275711862b5d9a9d9bba0 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Tue, 3 Dec 2024 13:31:55 +0100 Subject: [PATCH 07/24] Migrate documentation of ExaStatement to doc strings --- doc/api.rst | 5 +- pyexasol/statement.py | 152 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 9ba9806..e250fee 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -10,13 +10,14 @@ .. autofunction:: pyexasol.http_transport .. autoclass:: pyexasol.ExaConnection - :members: __init__ + :members: + :special-members: __init__ :undoc-members: :show-inheritance: .. autoclass:: pyexasol.ExaStatement - :class-doc-from: init :members: + :special-members: __init__, __iter__ :undoc-members: :show-inheritance: diff --git a/pyexasol/statement.py b/pyexasol/statement.py index 47f4122..9b80177 100644 --- a/pyexasol/statement.py +++ b/pyexasol/statement.py @@ -6,7 +6,45 @@ class ExaStatement(object): + """ + This class executes and helps to fetch result set of single Exasol SQL statement. + + Warning: + Unlike typical `Cursor` object, `ExaStatement` is not reusable. + + Note: + :class:`pyexasol.ExaStatement` may fetch result set rows as ``tuples`` (default) + or as ``dict`` (set `fetch_dict=True` in connection options). + + :class:`pyexasol.ExaStatement` may use custom data-type mapper during fetching + (set `fetch_mapper=` in connection options). + Mapper function accepts two arguments (raw `value` and `dataType` object) + and returns custom object or value. + + :class:`pyexasol.ExaStatement` fetches big result sets in chunks. + The size of chunk may be adjusted (set `fetch_size_bytes=` in connection options). + + Public Attributes: + ``execution_time``: + Execution time of SQL statement. It is measured by wall-clock time + of WebSocket request, so real execution time is a bit faster. + """ def __init__(self, connection, query=None, query_params=None, prepare=False, meta_nosql=False, **options): + """ + Args: + connection: + - + query: + - + query_params: + - + prepare: + - + meta_nosql: + - + options: + additonal kwargs + """ self.connection = connection self.query = query if meta_nosql else self._format_query(query, query_params) @@ -55,6 +93,18 @@ def __init__(self, connection, query=None, query_params=None, prepare=False, met self._execute() def __iter__(self): + """ + The best way to fetch result set of statement is to use iterator: + + Yields: + ``tuple`` or ``dict`` depending on ``fetch_dict`` connection option. + + Examples: + + >>> st = pyexasol.execute('SELECT * FROM table') + ... for row in st: + ... print(row) + """ return self def __next__(self): @@ -82,6 +132,13 @@ def __next__(self): return row def fetchone(self): + """ + Fetches one row of data. + + Returns: + ``tuple`` or ``dict``. + ``None`` if all rows were fetched. + """ try: row = next(self) except StopIteration: @@ -90,16 +147,55 @@ def fetchone(self): return row def fetchmany(self, size=constant.DEFAULT_FETCHMANY_SIZE): + """ + Fetch multiple rows. + + Args: + size: + Set the specific number of rows to fetch (Default: ``10000``) + + Returns: + ``list`` of ``tuples`` or ``list`` of ``dict``. + Empty `list` if all rows were fetched previously. + """ return [row for row in itertools.islice(self, size)] def fetchall(self): + """ + Fetches all remaining rows. + + Returns: + ``list`` of ``tuples`` or ``list`` of ``dict``. + Empty ``list`` if all rows were fetched previously. + + Warning: + This function may exhaust available memory. + """ return [row for row in self] def fetchcol(self): + """ + Fetches all values from the first column. + + Returns: + ``list`` of values. + Empty ``list`` if all rows were fetched previously. + """ self.fetch_dict = False return [row[0] for row in self] def fetchval(self): + """ + Fetches first column of first row. + + + Returns: + Value, ``None`` if all rows were fetched previously. + + Tip: + This may be useful for queries returning single value like + ``SELECT count(*) FROM table``. + """ self.fetch_dict = False try: @@ -110,18 +206,74 @@ def fetchval(self): return row[0] def rowcount(self): + """ + Number of selected/processed rows. + + Returns: + Total amount of selected rows for statements with result set (``num_rows``). + Total amount of processed rows for DML queries (``row_count``). + """ if self.result_type == 'resultSet': return self.num_rows_total else: return self.row_count def columns(self): + """ + Retrieves column information of returned data. + + Returns: + A ``dict`` with keys as ``column names`` and values as ``dataType`` objects. + + Notes: + + The dict will containt the following data: + + .. list-table:: + :header-rows: 1 + + * - Names + - Type + - Description + * - type + - string + - column data type + * - precision + - number + - (optional) column precision + * - scale + - number + - (optional) column scale + * - size + - number + - (optional) maximum size in bytes of a column value + * - characterSet + - string + - (optional) character encoding of a text column + * - withLocalTimeZone + - true, false + - (optional) specifies if a timestamp has a local time zone + * - fraction + - number + - (optional) fractional part of number + * - srid + - number + - (optional) spatial reference system identifier + """ return dict(zip(self.col_names, self.col_types)) def column_names(self): + """List of column names.""" return self.col_names def close(self): + """ + Closes result set handle if it was opened. + + Warning: + You won't be able to fetch next chunk of large dataset + after calling this function, but no other side-effects. + """ self._close_result_set_handle() self._close_statement_handle() From ba4b0578e1c3f70f53880413f1f088666b5ea718 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Tue, 3 Dec 2024 14:01:14 +0100 Subject: [PATCH 08/24] Migrate documentation of ExaFormatter to doc strings --- pyexasol/formatter.py | 99 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/pyexasol/formatter.py b/pyexasol/formatter.py index 306939d..4ccca3f 100644 --- a/pyexasol/formatter.py +++ b/pyexasol/formatter.py @@ -3,6 +3,22 @@ class ExaFormatter(string.Formatter): + """ + :class:`pyexasol.ExaFormatter` is a subclass of :class:`string.Formatter` designed to prevent SQL injections in Exasol dynamic SQL queries. + + Note: + It introduces set of placeholders to prevent SQL injections specifically + in Exasol dynamic SQL queries. It also completely disabled `format_spec` + section of standard formatting since it has no use in context of + SQL queries and may cause more harm than good. + +You may access these functions using `.format` property of connection object. Example: + + Examples: + + >>> C = pyexasol.connect(...) + ... print(C.format.escape('abc')) + """ safe_ident_regexp = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$') safe_decimal_regexp = re.compile(r'^(\+|-)?[0-9]+(\.[0-9]+)?$') safe_float_regexp = re.compile(r'^(\+|-)?[0-9]+(\.[0-9]+((e|E)(\+|-)[0-9]+)?)?$') @@ -51,18 +67,52 @@ def convert_field(self, value, conversion): @classmethod def escape(cls, val): + """ + Takes a raw value and converts it into an and escaped string. + + Args: + val: Value to be escaped. + + Returns: + A string where all single quotes ``'`` have been replaced + with two single quotes ``''``. + """ return str(val).replace("'", "''") @classmethod def escape_ident(cls, val): + """ + Takes a raw value and converts it into an and escaped string. + + Args: + val: Value to be escaped. + + Returns: + A string where all double quotes ``"`` have been replaced + with two double quotes ``""``. + """ return str(val).replace('"', '""') @classmethod def escape_like(cls, val): + """ + Escape LIKE-patterns. + + Args: + val: Value to be escaped. + + Returns: + A string where all double quotes ``\\`` have been replaced + with ``\\\\``, where ``%`` have been replaced with ``\%``, + where ``_`` have been replaced with ``\_``. + """ return cls.escape(val).replace('\\', '\\\\').replace('%', r'\%').replace('_', r'\_') @classmethod def quote(cls, val): + """ + Escapes a string using :meth:`pyexasol.ExaFormatter.escape` and wraps it in single quotes ``'``. + """ if val is None: return 'NULL' @@ -70,6 +120,16 @@ def quote(cls, val): @classmethod def quote_ident(cls, val): + """ + Escapes a string one or multiple values using :meth:`pyexasol.ExaFormatter.excape_ident` and wraps it in double quotes ``"``. + + Args: + val (str or tuple): Raw identifier(s) to be escaped. + + Returns: + str: The formatted and quoted identifier, or joined identifiers if + a tuple was provided. + """ if isinstance(val, tuple): return '.'.join([cls.quote_ident(x) for x in val]) @@ -77,6 +137,21 @@ def quote_ident(cls, val): @classmethod def safe_ident(cls, val): + """ + Convert a raw indientifer safely. + + Args: + val (str or tuple): Raw identifier(s). + + Returns: + Validates identifier as string. + + Raises: + ValueError If passed values is not a valid identifier (e.g. contains spaces) + + Warning: + It puts it into SQL query without any quotting. + """ if isinstance(val, tuple): return '.'.join([cls.safe_ident(x) for x in val]) @@ -97,6 +172,18 @@ def safe_ident(cls, val): @classmethod def safe_float(cls, val): + """ + Convert a float safely to string. + + Args: + val: Float value to convert. + + Returns: + Validates identifier as string. + + Raises: + ValueError: If value is not valid, e.g.: ``+infinity`` or ``-infinity``. + """ if val is None: return 'NULL' @@ -109,6 +196,18 @@ def safe_float(cls, val): @classmethod def safe_decimal(cls, val): + """ + Convert a decimal safely to string. + + Args: + val: Decimal value to convert. + + Returns: + Validates identifier as string. + + Raises: + ValueError: If value is not valid. + """ if val is None: return 'NULL' From c306506dc2addc82a8199ef6500cc7389c8e59f2 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Tue, 3 Dec 2024 16:06:18 +0100 Subject: [PATCH 09/24] Migrate documentation of ExaMetaData to doc strings --- pyexasol/meta.py | 231 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 223 insertions(+), 8 deletions(-) diff --git a/pyexasol/meta.py b/pyexasol/meta.py index ad2d7c9..dda6a4d 100644 --- a/pyexasol/meta.py +++ b/pyexasol/meta.py @@ -4,12 +4,17 @@ class ExaMetaData(object): """ - This class implements lock-free meta data requests using `/*snapshot execution*/` SQL hint described in IDEA-476 - https://www.exasol.com/support/browse/IDEA-476 + This class implements lock-free meta data requests using ``/*snapshot execution*/`` SQL hint described in `IDEA-476 `_. - If you still get locks, please make sure to update Exasol server to the latest minor version + Note: + If you still get locks, please make sure to update Exasol server to the latest minor version - This class also implements no SQL metadata commands introduced in Exasol v7.0 via .execute_meta_nosql() function + Examples: + + You may access these functions using `.meta` property of connection object. + + >>> C = pyexasol.connect(...) + ... print(C.meta.sql_columns('SELECT 1 AS id')) """ snapshot_execution_hint = '/*snapshot execution*/' @@ -20,6 +25,16 @@ def __init__(self, connection): def sql_columns(self, query, query_params=None): """ Get result set columns of SQL query without executing it + + Args: + query: + SQL query text, possibly with placholders. + query_params: + Values for placholders. + + Returns: + Columns of SQL query result without executing it. + Output format is similar to :meth:`pyexasol.ExaStatement.columns`. """ st = self.connection.cls_statement(self.connection, query, query_params, prepare=True) columns = st.columns() @@ -28,6 +43,16 @@ def sql_columns(self, query, query_params=None): return columns def schema_exists(self, schema_name): + """ + Check if schema exists. + + Args: + schema_name: + Name of the schema to check. + + Returns: + ``True`` if the schema exists, otherwise ``False``. + """ object_name = self.connection.format.default_format_ident_value(schema_name) if self.connection.protocol_version() >= constant.PROTOCOL_V2: @@ -46,6 +71,17 @@ def schema_exists(self, schema_name): return st.rowcount() > 0 def table_exists(self, table_name): + """ + Check if table exists. + + Args: + table_name: + Name of the table to check for. + If schema was not specified, ``current_schema`` is used. + + Returns: + ``True`` if the table exists, otherwise ``False``. + """ if isinstance(table_name, tuple): object_schema = self.connection.format.default_format_ident_value(table_name[0]) object_name = self.connection.format.default_format_ident_value(table_name[1]) @@ -73,6 +109,17 @@ def table_exists(self, table_name): return st.rowcount() > 0 def view_exists(self, view_name): + """ + Check if view exists. + + Args: + view_name: + Name of the table to check for. + If schema was not specified, ``current_schema`` is used. + + Returns: + ``True`` if the view exists, otherwise ``False``. + """ if isinstance(view_name, tuple): object_schema = self.connection.format.default_format_ident_value(view_name[0]) object_name = self.connection.format.default_format_ident_value(view_name[1]) @@ -100,6 +147,20 @@ def view_exists(self, view_name): return st.rowcount() > 0 def list_schemas(self, schema_name_pattern='%'): + """ + List Schemas. + + Args: + schema_name_pattern: + Schema name or LIKE-pattern to filter on. + (default: ``'%'``) + + Returns: + List of schemas from `EXA_SCHEMAS `_ system view matching LIKE-pattern. + + Note: + Patterns are case-sensitive. You may escape LIKE-patterns. + """ st = self.execute_snapshot(""" SELECT * FROM sys.exa_schemas @@ -112,6 +173,23 @@ def list_schemas(self, schema_name_pattern='%'): return st.fetchall() def list_tables(self, table_schema_pattern='%', table_name_pattern='%'): + """ + List Tables. + + Args: + table_schema_pattern: + Schema name or LIKE-pattern to filter on. + (default: ``'%'``) + table_name_pattern: + Table name or LIKE-pattern to filter on. + (default: ``'%'``) + + Returns: + List of tables from `EXA_ALL_TABLES `_ system view matching LIKE-pattern. + + Note: + Patterns are case-sensitive. You may escape LIKE-patterns. + """ st = self.execute_snapshot(""" SELECT * FROM sys.exa_all_tables @@ -126,6 +204,23 @@ def list_tables(self, table_schema_pattern='%', table_name_pattern='%'): return st.fetchall() def list_views(self, view_schema_pattern='%', view_name_pattern='%'): + """ + List Views. + + Args: + view_schema_pattern: + Schema name or LIKE-pattern to filter on. + (default: ``'%'``) + view_name_pattern: + Table name or LIKE-pattern to filter on. + (default: ``'%'``) + + Returns: + List of views from `EXA_ALL_VIEWS `_ system view matching LIKE-pattern. + + Note: + Patterns are case-sensitive. You may escape LIKE-patterns. + """ st = self.execute_snapshot(""" SELECT * FROM sys.exa_all_views @@ -141,6 +236,29 @@ def list_views(self, view_schema_pattern='%', view_name_pattern='%'): def list_columns(self, column_schema_pattern='%', column_table_pattern='%' , column_object_type_pattern='%', column_name_pattern='%'): + """ + List Columns. + + Args: + column_schema_pattern: + Schema name or LIKE-pattern to filter on. + (default: ``'%'``) + column_table_pattern: + Table name or LIKE-pattern to filter on. + (default: ``'%'``) + column_object_type_pattern: + Object type or LIKE-pattern to filter on. + (default: ``'%'``) + column_name_pattern: + Column name or LIKE-pattern to filter on. + (default: ``'%'``) + + Returns: + List of columns from `EXA_ALL_COLUMNS `_ system view matching LIKE-pattern. + + Note: + Patterns are case-sensitive. You may escape LIKE-patterns. + """ st = self.execute_snapshot(""" SELECT * FROM sys.exa_all_columns @@ -158,6 +276,29 @@ def list_columns(self, column_schema_pattern='%', column_table_pattern='%' return st.fetchall() def list_objects(self, object_name_pattern='%', object_type_pattern='%', owner_pattern='%', root_name_pattern='%'): + """ + List Objects. + + Args: + object_name_pattern: + Object name or LIKE-pattern to filter on. + (default: ``'%'``) + object_type_pattern: + Object type or LIKE-pattern to filter on. + (default: ``'%'``) + owner_pattern: + Owner name or LIKE-pattern to filter on. + (default: ``'%'``) + root_name_pattern: + Root name or LIKE-pattern to filter on.j + It normally refers to schema name. + (default: ``'%'``) + + Returns: + List of objects from `EXA_ALL_OBJECTS `_ system view matching LIKE-pattern. + Note: + Patterns are case-sensitive. You may escape LIKE-patterns. + """ st = self.execute_snapshot(""" SELECT * FROM sys.exa_all_objects @@ -175,6 +316,29 @@ def list_objects(self, object_name_pattern='%', object_type_pattern='%', owner_p return st.fetchall() def list_object_sizes(self, object_name_pattern='%', object_type_pattern='%', owner_pattern='%', root_name_pattern='%'): + """ + List Objects with their respective size. + + Args: + object_name_pattern: + Object name or LIKE-pattern to filter on. + (default: ``'%'``) + object_type_pattern: + Object type or LIKE-pattern to filter on. + (default: ``'%'``) + owner_pattern: + Owner name or LIKE-pattern to filter on. + (default: ``'%'``) + root_name_pattern: + Root name or LIKE-pattern to filter on.j + It normally refers to schema name. + (default: ``'%'``) + + Returns: + List of objects with sizes from `EXA_ALL_OBJECT_SIZES `_ system view matching LIKE-pattern. + Note: + Patterns are case-sensitive. You may escape LIKE-patterns. + """ st = self.execute_snapshot(""" SELECT * FROM sys.exa_all_object_sizes @@ -192,6 +356,26 @@ def list_object_sizes(self, object_name_pattern='%', object_type_pattern='%', ow return st.fetchall() def list_indices(self, index_schema_pattern='%', index_table_pattern='%', index_owner_pattern='%'): + """ + List indicies. + + Args: + index_schema_pattern: + Schema name or LIKE-pattern to filter on. + (default: ``'%'``) + index_table_pattern: + Table name or LIKE-pattern to filter on. + (default: ``'%'``) + index_owner_pattern: + Owner name or LIKE-pattern to filter on. + (default: ``'%'``) + + Returns: + List of indices with sizes from `EXA_ALL_INDICES `_ system view matching LIKE-pattern. + + Note: + Patterns are case-sensitive. You may escape LIKE-patterns. + """ st = self.execute_snapshot(""" SELECT * FROM sys.exa_all_indices @@ -209,7 +393,14 @@ def list_indices(self, index_schema_pattern='%', index_table_pattern='%', index_ def list_sql_keywords(self): """ Get reserved SQL keywords which cannot be used as identifiers without double-quote escaping - Never hardcode this list! It might change with next Exasol server version without warning + + Returns: + List of SQL keywords from `EXA_SQL_KEYWORDS `_ system view. + + Warning: + Never hardcode this list! It might change with next Exasol server version without warning + Note: + These keywords cannot be used as identifiers without double quotes. """ if not self.sql_keywords: if self.connection.protocol_version() >= constant.PROTOCOL_V2: @@ -231,7 +422,20 @@ def list_sql_keywords(self): def execute_snapshot(self, query, query_params=None): """ Execute query in snapshot transaction mode using SQL hint - fetch_dict=True is enforced to prevent users from relying on order of columns in system views + + Args: + query: + SQL query text, possibly with placeholders. + query_params: + Values for placeholders. + Returns: + :class:`ExaStatement` + + Warning: + Please do not try to query normal tables with this method. It will fail during creation of indices or statistics objects. + + Note: + ``fetch_dict=Tru`` is enforced to prevent users from relying on order of columns in system views """ options = { 'fetch_dict': True, @@ -242,9 +446,20 @@ def execute_snapshot(self, query, query_params=None): def execute_meta_nosql(self, meta_command, meta_params=None): """ Execute no SQL meta data command introduced in Exasol 7.0+ - This feature requires WebSocket protocol v2 or higher - List of available commands: https://github.com/exasol/websocket-api/blob/master/docs/WebsocketAPIV2.md#metadata-related-commands + Args: + meta_command: + Metadata command. + meta_params: + Parameters for metadata command. + + Returns: + :class:`ExaStatement` + + Note: + This feature requires WebSocket protocol v2 or higher + + List of available commands can be found `here `_. """ if self.connection.protocol_version() < constant.PROTOCOL_V2: raise ExaRuntimeError(self.connection, 'Protocol version 2 is required to execute nosql meta data commands') From 23dd3beb526c6c4ef3dcf013a03708f04c1cd441 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 09:55:15 +0100 Subject: [PATCH 10/24] Migrate documentation of ExaExtension to doc strings --- pyexasol/ext.py | 196 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 151 insertions(+), 45 deletions(-) diff --git a/pyexasol/ext.py b/pyexasol/ext.py index e117ad9..00d524a 100644 --- a/pyexasol/ext.py +++ b/pyexasol/ext.py @@ -6,26 +6,46 @@ class ExaExtension(object): + """ + This class extends the functionality of a simple SQL driver to address common Exasol-related problems. + + Tip: + You may access these functions using `.ext` property of connection object. + + Examples: + >>> C = pyexasol.connect(...) + ... print(C.ext.get_disk_space_usage()) + """ + def __init__(self, connection): self.connection = connection self.reserved_words = None def get_columns(self, object_name): """ - DEPRECATED, please use `.meta.sql_columns` instead + Get information about columns of table or view. + + Args: + object_name: Object name may be passed as tuple to specify custom schema. - Get information about columns of table or view (Websocket format) - Object name may be passed as tuple to specify custom schema + Caution: + **DEPRECATED**, please use ``.meta.sql_columns`` instead. """ object_name = self.connection.format.default_format_ident(object_name) return self.get_columns_sql(f"SELECT * FROM {object_name}") def get_columns_sql(self, query, query_params=None): """ - DEPRECATED, please use `.meta.sql_columns` instead + Get columns of SQL query without executing it. - Get columns of SQL query without executing it (Websocket format) - It relies on prepared statement which is closed immediately without execution + Args: + object_name: Object name may be passed as tuple to specify custom schema. + + Caution: + **DEPRECATED**, please use ``.meta.sql_columns`` instead. + + Note: + It relies on prepared statement which is closed immediately without execution """ stmt = self.connection.cls_statement(self.connection, query, query_params, prepare=True) columns = stmt.columns() @@ -35,14 +55,27 @@ def get_columns_sql(self, query, query_params=None): def insert_multi(self, table_name, data, columns=None): """ - INSERT small number of rows into table using prepared statement - It provides better performance for small data sets of 10,000 rows or less compared to .import_from_iterable() + Insert a samll number of rows into a table using a prepared statement. + + Args: + table_name: + Target table for INSERT. + data: + Source object implementing ``__iter__`` (e.g. list or tuple). + columns: + List of column names to specify custom order of columns. + + Tip: + Compared to ``.import_from_iterable``, this method offers better performance for small data sets of 10,000 rows or fewer. - Please use .import_from_iterable() for larger data sets and better memory efficiency - Please use .import_from_pandas() to import from data frame regardless of its size + * Use ``.import_from_iterable`` for larger data sets and better memory efficiency + * Use ``.import_from_pandas`` to import from data frame regardless of its size - You may use "columns" argument to specify custom order of columns for insertion - If some columns are not included in this list, NULL or DEFAULT value will be used instead + You may use "columns" argument to specify custom order of columns for insertion + If some columns are not included in this list, ``NULL`` or ``DEFAULT`` value will be used instead + + Note: + Please note that data should be presented in a row format. You may use ``zip(*data_cols)`` to convert columnar format into row format. """ # Convert possible iterator into list @@ -70,10 +103,13 @@ def insert_multi(self, table_name, data, columns=None): def get_sys_columns(self, object_name): """ - DEPRECATED, please use `.meta.list_columns` instead - Get information about columns of table or view (SYS format) - Object name may be passed as tuple to specify custom schema + + Args: + object_name: Object name may be passed as tuple to specify custom schema. + + Caution: + **DEPRECATED**, please use ``.meta.list_columns`` instead. """ if isinstance(object_name, tuple): schema = self.connection.format.default_format_ident_value(object_name[0]) @@ -113,10 +149,16 @@ def get_sys_columns(self, object_name): def get_sys_tables(self, schema=None, table_name_prefix=''): """ - DEPRECATED, please use `.meta.list_tables` instead - Get information about tables in selected schema(SYS format) - Output may be optionally filtered by table name prefix + + Args: + schema: + - + table_name_prefix: + Output may be optionally filtered by table name prefix. + + Caution: + **DEPRECATED**, please use ``.meta.list_tables`` instead. """ if schema is None: schema = self.connection.current_schema() @@ -150,10 +192,16 @@ def get_sys_tables(self, schema=None, table_name_prefix=''): def get_sys_views(self, schema=None, view_name_prefix=''): """ - DEPRECATED, please use `.meta.list_views` instead - Get information about views in selected schema(SYS format) - Output may be optionally filtered by view name prefix + + Args: + schema: + - + view_name_prefix: + Output may be optionally filtered by view name prefix. + + Caution: + **DEPRECATED**, please use ``.meta.list_views`` instead. """ if schema is None: schema = self.connection.current_schema() @@ -187,10 +235,14 @@ def get_sys_views(self, schema=None, view_name_prefix=''): def get_sys_schemas(self, schema_name_prefix=''): """ - DEPRECATED, please use `.meta.list_schemas` instead - Get information about schemas (SYS format) - Output may be optionally filtered by schema name prefix + + Args: + schema_name_prefix: + Output may be optionally filtered by schema name prefix + + Caution: + **DEPRECATED**, please use ``.meta.list_schemas`` instead. """ schema_name_prefix = self.connection.format.default_format_ident_value(schema_name_prefix) schema_name_prefix = self.connection.format.escape_like(schema_name_prefix) @@ -217,10 +269,13 @@ def get_sys_schemas(self, schema_name_prefix=''): def get_reserved_words(self): """ - DEPRECATED, please use `.meta.list_sql_keywords` instead + Get reserved keywords which cannot be used as identifiers without double-quotes. + + Caution: + **DEPRECATED**, please use ``.meta.list_sql_keywords`` instead. - Get reserved keywords which cannot be used as identifiers without double-quotes - Never hard-code this list! It changes with every Exasol versions + Warning: + Never hard-code this list! It changes with every Exasol versions. """ if self.reserved_words is None: sql = """ @@ -236,8 +291,29 @@ def get_reserved_words(self): def get_disk_space_usage(self): """ - Exasol still lacks standard function to measure actual disk space usage - We're trying to mitigate this problem by making custom function + Get the disk space usage of the exasol DB. + + Returns: + + A dict with 4 keys, providing all disk space details. + + .. list-table:: + :header-rows: 1 + + * - Key + - Description + * - ``occupied_size`` + - How much space is occupied (in bytes) + * - ``free_size`` + - How much space is available (in bytes) + * - ``total_size`` + - occupied_size + free_size + * - ``occupied_size_percent`` + - Percentage of occupied disk space (0-100%) + + Note: + Exasol still lacks a standard function to measure actual disk space usage. + We are trying to mitigate this problem by creating a custom function. """ sql = """ SELECT measure_time, @@ -263,15 +339,34 @@ def get_disk_space_usage(self): def export_to_pandas_with_dtype(self, query_or_table, query_params=None): """ - Export to pandas and attempt to guess correct dtypes based on Exasol columns - Since pandas has significantly smaller range of allowed values, this function makes many assumptions - Please use it as baseline for your own function for complex cases - - Small decimal -> int32 - Big decimal -> int64 - Double -> float64 - Date, Timestamp -> datetime64[ns] - Everything else -> category (!) + Export to pandas and attempt to guess correct dtypes based on Exasol columns. + + Args: + query_or_table: + Query or table to export. + query_params: + Additional query parameters. + + Note: + Since pandas has significantly smaller range of allowed values, this function makes many assumptions + Please use it as baseline for your own function for complex cases + + .. list-table:: + :widths: 25 25 + :header-rows: 1 + + * - Exasol Type + - Pandas Type + * - Small decimal + - int32 + * - Big decimal + - int64 + * - Double + - float64 + * - Date, Timestamp + - datetime64[ns] + * - Everything else + - category (!) """ if query_params: @@ -328,16 +423,27 @@ def callback(pipe, dst, **kwargs): def explain_last(self, details=False): """ - Returns profiling information for last executed query - This function should be called immediately after execute() + Args: + details (bool): + - ``False``, the function returns the average (AVG) or maximum (MAX) values aggregated for all Exasol nodes. + - ``True``, the function returns separate rows for each individual Exasol node, with a column labeled "iproc" representing the node. + + Returns: + Profiling information for last executed query. + + Note: + This function should be called immediately after ``execute()`` + ``COMMIT``, ``ROLLBACK`` and ``FLUSH STATISTICS`` queries are ignored. + Tip: + Details are useful to detect bad data distribution and imbalanced execution + If you want to see real values of ``CPU, MEM, HDD, NET`` columns, + please enable Exasol profiling first with: - details=False returns AVG or MAX values for all Exasol nodes - details=True returns separate rows for each individual Exasol node (column "iproc") + .. code-block:: sql - Details are useful to detect bad data distribution and imbalanced execution + ALTER SESSION SET PROFILE = 'ON'; - If you want to see real values of CPU, MEM, HDD, NET columns, please enable Exasol profiling first with: - ALTER SESSION SET PROFILE = 'ON'; + *Please refer to Exasol User Manuals for explanations about profiling columns.* """ self._execute('FLUSH STATISTICS') From aa1fbc8894564120ee5251f0708bda3f8388b8ea Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 10:35:52 +0100 Subject: [PATCH 11/24] Migrate documentation of ExaHTTPTransportWrapper to doc strings --- pyexasol/http_transport.py | 63 +++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/pyexasol/http_transport.py b/pyexasol/http_transport.py index 7108593..80892fc 100644 --- a/pyexasol/http_transport.py +++ b/pyexasol/http_transport.py @@ -259,11 +259,17 @@ def terminate(self): class ExaHTTPTransportWrapper(object): """ - Start HTTP server, obtain address ("ipaddr:port" string) - Send it to parent process + Wrapper for :ref:`http_transport_parallel`. - Block into "export_*()" or "import_*()" call, - wait for incoming connection, process data and exit. + You may create this wrapper using :func:`pyexasol.http_transport`. + + Note: + + Starts an HTTP server, obtains the address (the ``"ipaddr:port"`` string), + and sends it to the parent process. + + Block into ``export_*()`` or ``import_*()`` call, + wait for incoming connection, process data and exit. """ def __init__(self, ipaddr, port, compression=False, encryption=True): self.http_thread = ExaHttpThread(ipaddr, port, compression, encryption) @@ -271,13 +277,42 @@ def __init__(self, ipaddr, port, compression=False, encryption=True): @property def exa_address(self): + """ + Internal Exasol address as ``ipaddr:port`` string. + + Note: + This string should be passed from child processes to parent process + and used as an argument for ``export_parallel()`` and + ``import_parallel()`` functions. + """ return self.http_thread.exa_address def get_proxy(self): - """ DEPRECATED, please use .exa_address property """ + """ + Caution: + **DEPRECATED**, please use ``.exa_address`` property + """ return self.http_thread.exa_address def export_to_callback(self, callback, dst, callback_params=None): + """ + Exports chunk of data using callback function. + + Args: + callback: + Callback function. + dst: + Export destination for callback function. + callback_params: + Dict with additional parameters for callback function. + + Returns: + Result of the callback function. + + Note: + You may use exactly the same callbacks utilized by standard + non-parallel ``export_to_callback()`` function. + """ if not callable(callback): raise ValueError('Callback argument is not callable') @@ -299,6 +334,24 @@ def export_to_callback(self, callback, dst, callback_params=None): raise e def import_from_callback(self, callback, src, callback_params=None): + """ + Import chunk of data using callback function. + + Args: + callback: + Callback function. + src: + Import source for the callback function. + callback_params: + Dict with additional parameters for the callback function. + + Returns: + Result of callback function + + Note: + You may use exactly the same callbacks utilized by standard + non-parallel ``import_from_callback()`` function. + """ if not callable(callback): raise ValueError('Callback argument is not callable') From 8b6e27e889acfcc84475f38e30b51d709bdc22b3 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 10:47:33 +0100 Subject: [PATCH 12/24] Update changelog --- doc/changes/unreleased.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md index a1ecf84..28f09ad 100644 --- a/doc/changes/unreleased.md +++ b/doc/changes/unreleased.md @@ -1,2 +1,6 @@ # Unreleased +## 📚 Documentation + +* Add sphinx based documention + From 89a24ddda3c59efa7c083a3de120295c7af9b20a Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 13:21:12 +0100 Subject: [PATCH 13/24] Update pyexasol/__init__.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/__init__.py b/pyexasol/__init__.py index da3cfb7..38d487d 100644 --- a/pyexasol/__init__.py +++ b/pyexasol/__init__.py @@ -104,7 +104,7 @@ def http_transport(ipaddr, port, compression=False, encryption=True) -> ExaHTTPT compression: Use zlib compression for HTTP transport, must be the same as `compression` of main connection encryption: - Use SSL encryption for HTTP transport, must be the same as `encryption` of main connection + Use SSL/TLS encryption for HTTP transport, must be the same as `encryption` of main connection Info: Compression and encryption arguments should match :func:`pyexasol.connect` From 945320cc716365f4522e7eef391b4b730a847a42 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 13:21:39 +0100 Subject: [PATCH 14/24] Update pyexasol/connection.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index 18c1d73..e6f2e68 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -50,7 +50,7 @@ class ExaConnection(object): Note: It is advisable to use multiprocessing instead of threading and create - new connection in each sub-process + a new connection in each sub-process Public Attributes: ``attr``: From 95729bc1f9d7765d9ee9a9667fdc792f30edebea Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 14:02:53 +0100 Subject: [PATCH 15/24] Update pyexasol/connection.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index e6f2e68..db5e966 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -773,7 +773,7 @@ def session_id(self): Session id of current session. Returns: - Unique `SESSION_ID` of the current session. + Unique `SESSION_ID` of the current session as string. """ return str(self.login_info.get('sessionId', '')) From b0d6ef5ac32f32c9d049c29ee33d2ec6101a522d Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 14:03:26 +0100 Subject: [PATCH 16/24] Update pyexasol/connection.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index db5e966..cde29cf 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -433,7 +433,7 @@ def current_schema(self): Get the name of the current schema. Returns: - Name of currently opened schema. Return empty string if no schema was opened. + Name of currently opened schema. Returns an empty string if no schema was opened. """ return self.attr.get('currentSchema', '') From 53fb8a64c3192071fab5af767b405aed3a17f47f Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 14:04:52 +0100 Subject: [PATCH 17/24] Update pyexasol/connection.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index cde29cf..d8f2005 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -484,7 +484,7 @@ def export_to_list(self, query_or_table, query_params=None, export_params=None): Examples: >>> con = ExaConnection(...) - >>> con.export_to_list( + >>> myresult = con.export_to_list( ... query_or_table="SELECT * FROM table" ... ) """ From 2bd18e9ca00d61e855eae16f7b778cc3010a0e64 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 14:05:50 +0100 Subject: [PATCH 18/24] Update pyexasol/connection.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index d8f2005..82e50c8 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -510,7 +510,7 @@ def export_to_pandas(self, query_or_table, query_params=None, callback_params=No Examples: >>> con = ExaConnection(...) - >>> con.export_to_pandas( + >>> myresult = con.export_to_pandas( ... query_or_table="SELECT * FROM table" ... ) """ From c2f1f2348193b9dfe20269219c75a4958f9ad4c3 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 14:06:10 +0100 Subject: [PATCH 19/24] Update pyexasol/connection.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index 82e50c8..5d96f07 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -782,7 +782,7 @@ def protocol_version(self): Actual protocol version used by the the established connection. Returns: - ``0`` if connection was not established yet (e.g. due to exception handling) + ``0`` if connection was not established yet (e.g. due to exception handling), otherwise protocol version as int. Warnings: Actual Protocol version might be downgraded from requested protocol version if Exasol server does not support it From 753fe05c1fd5b2ec9305a7977551f40ff87e3680 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 14:06:29 +0100 Subject: [PATCH 20/24] Update pyexasol/formatter.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/formatter.py b/pyexasol/formatter.py index 4ccca3f..b6a41cf 100644 --- a/pyexasol/formatter.py +++ b/pyexasol/formatter.py @@ -8,7 +8,7 @@ class ExaFormatter(string.Formatter): Note: It introduces set of placeholders to prevent SQL injections specifically - in Exasol dynamic SQL queries. It also completely disabled `format_spec` + in Exasol dynamic SQL queries. It also completely disables `format_spec` section of standard formatting since it has no use in context of SQL queries and may cause more harm than good. From 63342d52db84650a58d0caabc50527e70dfaa8a1 Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Wed, 4 Dec 2024 14:18:02 +0100 Subject: [PATCH 21/24] Update pyexasol/formatter.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/formatter.py b/pyexasol/formatter.py index b6a41cf..fe6af73 100644 --- a/pyexasol/formatter.py +++ b/pyexasol/formatter.py @@ -138,7 +138,7 @@ def quote_ident(cls, val): @classmethod def safe_ident(cls, val): """ - Convert a raw indientifer safely. + Convert a raw identifier safely. Args: val (str or tuple): Raw identifier(s). From fb55fa2e680e0165beec45d1f6fc8698bc8cf0af Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Thu, 5 Dec 2024 08:16:26 +0100 Subject: [PATCH 22/24] Update pyexasol/formatter.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/formatter.py b/pyexasol/formatter.py index fe6af73..71c94d0 100644 --- a/pyexasol/formatter.py +++ b/pyexasol/formatter.py @@ -121,7 +121,7 @@ def quote(cls, val): @classmethod def quote_ident(cls, val): """ - Escapes a string one or multiple values using :meth:`pyexasol.ExaFormatter.excape_ident` and wraps it in double quotes ``"``. + Escapes an object or a tuple of objects using :meth:`pyexasol.ExaFormatter.escape_ident` and wraps it in double quotes ``"``. Args: val (str or tuple): Raw identifier(s) to be escaped. From cdc45fd89f7450f54d0e641bcb09bfd41998ef8c Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Thu, 5 Dec 2024 08:19:24 +0100 Subject: [PATCH 23/24] Update pyexasol/formatter.py Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- pyexasol/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyexasol/formatter.py b/pyexasol/formatter.py index 71c94d0..8f194ab 100644 --- a/pyexasol/formatter.py +++ b/pyexasol/formatter.py @@ -173,7 +173,7 @@ def safe_ident(cls, val): @classmethod def safe_float(cls, val): """ - Convert a float safely to string. + Convert a float safely to string. Args: val: Float value to convert. From dc6e068ba5a8718af259388e4473842b33491d4c Mon Sep 17 00:00:00 2001 From: Nicola Coretti Date: Thu, 5 Dec 2024 08:16:57 +0100 Subject: [PATCH 24/24] Address code review feedback --- pyexasol/connection.py | 2 +- pyexasol/formatter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyexasol/connection.py b/pyexasol/connection.py index 5d96f07..ffbae6f 100644 --- a/pyexasol/connection.py +++ b/pyexasol/connection.py @@ -329,7 +329,7 @@ def execute_udf_output(self, query, query_params=None): Examples: >>> con = ExaConnection(...) - >>> con.execute( + >>> stmt, output_files = con.execute_udf_output( ... query="SELECT * FROM {table!i} WHERE col1={col1}", ... query_params={'table': 'users', 'col1':'bar'} ...) diff --git a/pyexasol/formatter.py b/pyexasol/formatter.py index 8f194ab..bd9a000 100644 --- a/pyexasol/formatter.py +++ b/pyexasol/formatter.py @@ -124,7 +124,7 @@ def quote_ident(cls, val): Escapes an object or a tuple of objects using :meth:`pyexasol.ExaFormatter.escape_ident` and wraps it in double quotes ``"``. Args: - val (str or tuple): Raw identifier(s) to be escaped. + val: Raw identifier(s) to be escaped. Returns: str: The formatted and quoted identifier, or joined identifiers if