diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 42acb50ed..e27709682 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -41,7 +41,7 @@ # Nullable types are problematic "clickhouse", } -MOTHERDUCK_TOKEN_REGEX = re.compile(r"(\?motherduck_token=)(\S*)") +MOTHERDUCK_TOKEN_REGEX = re.compile(r"(\?|\&)(motherduck_token=)(\S*)") class ConnectionConfig(abc.ABC, BaseConfig): @@ -134,7 +134,6 @@ class DuckDBAttachOptions(BaseConfig): type: str path: str read_only: bool = False - token: t.Optional[str] = None def to_sql(self, alias: str) -> str: options = [] @@ -144,14 +143,18 @@ def to_sql(self, alias: str) -> str: options.append(f"TYPE {self.type.upper()}") if self.read_only: options.append("READ_ONLY") + options_sql = f" ({', '.join(options)})" if options else "" + alias_sql = "" # TODO: Add support for Postgres schema. Currently adding it blocks access to the information_schema - alias_sql = ( + if self.type == "motherduck": # MotherDuck does not support aliasing - f" AS {alias}" if not (self.type == "motherduck" or self.path.startswith("md:")) else "" - ) - options_sql = f" ({', '.join(options)})" if options else "" - token_sql = "?motherduck_token=" + self.token if self.token else "" - return f"ATTACH '{self.path}{token_sql}'{alias_sql}{options_sql}" + if (md_db := self.path.replace("md:", "")) != alias.replace('"', ""): + raise ConfigError( + f"MotherDuck does not support assigning an alias different from the database name {md_db}." + ) + else: + alias_sql += f" AS {alias}" + return f"ATTACH '{self.path}'{alias_sql}{options_sql}" class BaseDuckDBConnectionConfig(ConnectionConfig): @@ -293,16 +296,20 @@ def init(cursor: duckdb.DuckDBPyConnection) -> None: query = f"ATTACH '{path_options}'" if not path_options.startswith("md:"): query += f" AS {alias}" - elif self.token: - query += f"?motherduck_token={self.token}" cursor.execute(query) except BinderException as e: # If a user tries to create a catalog pointing at `:memory:` and with the name `memory` # then we don't want to raise since this happens by default. They are just doing this to # set it as the default catalog. - if not ( - 'database with name "memory" already exists' in str(e) - and path_options == ":memory:" + # If a user tried to attach a MotherDuck database/share which has already by attached via + # `ATTACH 'md:'`, then we don't want to raise since this is expected. + if ( + not ( + 'database with name "memory" already exists' in str(e) + and path_options == ":memory:" + ) + and f"""database with name "{path_options.path.replace('md:', '')}" already exists""" + not in str(e) ): raise e if i == 0 and not getattr(self, "database", None): @@ -355,7 +362,9 @@ def get_catalog(self) -> t.Optional[str]: return None def _mask_motherduck_token(self, string: str) -> str: - return MOTHERDUCK_TOKEN_REGEX.sub(lambda m: f"{m.group(1)}{'*' * len(m.group(2))}", string) + return MOTHERDUCK_TOKEN_REGEX.sub( + lambda m: f"{m.group(1)}{m.group(2)}{'*' * len(m.group(3))}", string + ) class MotherDuckConnectionConfig(BaseDuckDBConnectionConfig): @@ -373,11 +382,12 @@ def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: from sqlmesh import __version__ custom_user_agent_config = {"custom_user_agent": f"SQLMesh/{__version__}"} - if not self.database: - return {"config": custom_user_agent_config} - connection_str = f"md:{self.database or ''}" + connection_str = "md:" + if self.database: + # Attach single MD database instead of all databases on the account + connection_str += f"{self.database}?attach_mode=single" if self.token: - connection_str += f"?motherduck_token={self.token}" + connection_str += f"{'&' if self.database else '?'}motherduck_token={self.token}" return {"database": connection_str, "config": custom_user_agent_config} diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 8bbfe474f..122a57b04 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -647,42 +647,66 @@ def _thread_connection(): def test_motherduck_token_mask(make_config): - config = make_config( + config_1 = make_config( + type="motherduck", + token="short", + database="whodunnit", + ) + config_2 = make_config( + type="motherduck", + token="longtoken123456789", + database="whodunnit", + ) + config_3 = make_config( type="motherduck", + token="secret1235", catalogs={ - "test2": DuckDBAttachOptions( - type="motherduck", - path="md:whodunnit?motherduck_token=short", - ), "test1": DuckDBAttachOptions( - type="motherduck", path="md:whodunnit", token="longtoken123456789" + type="motherduck", + path="md:whodunnit", ), }, ) - assert isinstance(config, MotherDuckConnectionConfig) - assert config._mask_motherduck_token(config.catalogs["test1"].path) == "md:whodunnit" + assert isinstance(config_1, MotherDuckConnectionConfig) + assert isinstance(config_2, MotherDuckConnectionConfig) + assert isinstance(config_3, MotherDuckConnectionConfig) + assert config_1._mask_motherduck_token(config_1.database) == "whodunnit" assert ( - config._mask_motherduck_token(config.catalogs["test2"].path) + config_1._mask_motherduck_token(f"md:{config_1.database}?motherduck_token={config_1.token}") == "md:whodunnit?motherduck_token=*****" ) assert ( - config._mask_motherduck_token("?motherduck_token=secret1235") + config_1._mask_motherduck_token( + f"md:{config_1.database}?attach_mode=single&motherduck_token={config_1.token}" + ) + == "md:whodunnit?attach_mode=single&motherduck_token=*****" + ) + assert ( + config_2._mask_motherduck_token(f"md:{config_2.database}?motherduck_token={config_2.token}") + == "md:whodunnit?motherduck_token=******************" + ) + assert ( + config_3._mask_motherduck_token(f"md:?motherduck_token={config_3.token}") + == "md:?motherduck_token=**********" + ) + assert ( + config_1._mask_motherduck_token("?motherduck_token=secret1235") == "?motherduck_token=**********" ) assert ( - config._mask_motherduck_token("md:whodunnit?motherduck_token=short") + config_1._mask_motherduck_token("md:whodunnit?motherduck_token=short") == "md:whodunnit?motherduck_token=*****" ) assert ( - config._mask_motherduck_token("md:whodunnit?motherduck_token=longtoken123456789") + config_1._mask_motherduck_token("md:whodunnit?motherduck_token=longtoken123456789") == "md:whodunnit?motherduck_token=******************" ) assert ( - config._mask_motherduck_token("md:whodunnit?motherduck_token=") + config_1._mask_motherduck_token("md:whodunnit?motherduck_token=") == "md:whodunnit?motherduck_token=" ) - assert config._mask_motherduck_token(":memory:") == ":memory:" + assert config_1._mask_motherduck_token(":memory:") == ":memory:" def test_bigquery(make_config):