From be724cd0fdd26a6833af85bcaf35ae716baeb1df Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Wed, 3 Nov 2021 11:25:30 -0700 Subject: [PATCH] fix: import should accept old keys (#17330) * fix: import should accept old keys * Fix lint * Preserve V1 schema --- superset/databases/commands/export.py | 27 +++++++++--- .../databases/commands/importers/v1/utils.py | 7 +++ superset/databases/schemas.py | 44 ++++++++++++++----- .../databases/commands_tests.py | 41 +++++++++++++++-- .../fixtures/importexport.py | 2 +- 5 files changed, 101 insertions(+), 20 deletions(-) diff --git a/superset/databases/commands/export.py b/superset/databases/commands/export.py index 53cbe67d22512..134bda580c7e5 100644 --- a/superset/databases/commands/export.py +++ b/superset/databases/commands/export.py @@ -39,11 +39,11 @@ def parse_extra(extra_payload: str) -> Dict[str, Any]: logger.info("Unable to decode `extra` field: %s", extra_payload) return {} - # Fix for DBs saved with an invalid ``schemas_allowed_for_file_upload`` - schemas_allowed_for_file_upload = extra.get("schemas_allowed_for_file_upload") - if isinstance(schemas_allowed_for_file_upload, str): - extra["schemas_allowed_for_file_upload"] = json.loads( - schemas_allowed_for_file_upload + # Fix for DBs saved with an invalid ``schemas_allowed_for_csv_upload`` + schemas_allowed_for_csv_upload = extra.get("schemas_allowed_for_csv_upload") + if isinstance(schemas_allowed_for_csv_upload, str): + extra["schemas_allowed_for_csv_upload"] = json.loads( + schemas_allowed_for_csv_upload ) return extra @@ -65,10 +65,25 @@ def _export(model: Database) -> Iterator[Tuple[str, str]]: include_defaults=True, export_uuids=True, ) + + # https://github.com/apache/superset/pull/16756 renamed ``allow_csv_upload`` + # to ``allow_file_upload`, but we can't change the V1 schema + replacements = {"allow_file_upload": "allow_csv_upload"} + # this preserves key order, which is important + payload = {replacements.get(key, key): value for key, value in payload.items()} + # TODO (betodealmeida): move this logic to export_to_dict once this # becomes the default export endpoint if payload.get("extra"): - payload["extra"] = parse_extra(payload["extra"]) + extra = payload["extra"] = parse_extra(payload["extra"]) + + # ``schemas_allowed_for_csv_upload`` was also renamed to + # ``schemas_allowed_for_file_upload``, we need to change to preserve the + # V1 schema + if "schemas_allowed_for_file_upload" in extra: + extra["schemas_allowed_for_csv_upload"] = extra.pop( + "schemas_allowed_for_file_upload" + ) payload["version"] = EXPORT_VERSION diff --git a/superset/databases/commands/importers/v1/utils.py b/superset/databases/commands/importers/v1/utils.py index 6e016d0e0701f..6704ccd465587 100644 --- a/superset/databases/commands/importers/v1/utils.py +++ b/superset/databases/commands/importers/v1/utils.py @@ -32,6 +32,13 @@ def import_database( return existing config["id"] = existing.id + # https://github.com/apache/superset/pull/16756 renamed ``csv`` to ``file``. + config["allow_file_upload"] = config.pop("allow_csv_upload") + if "schemas_allowed_for_csv_upload" in config["extra"]: + config["extra"]["schemas_allowed_for_file_upload"] = config["extra"].pop( + "schemas_allowed_for_csv_upload" + ) + # TODO (betodealmeida): move this logic to import_from_dict config["extra"] = json.dumps(config["extra"]) diff --git a/superset/databases/schemas.py b/superset/databases/schemas.py index 20a06457a9c3b..81e3d89029966 100644 --- a/superset/databases/schemas.py +++ b/superset/databases/schemas.py @@ -558,15 +558,23 @@ def fix_schemas_allowed_for_csv_upload( self, data: Dict[str, Any], **kwargs: Any ) -> Dict[str, Any]: """ - Fix ``schemas_allowed_for_file_upload`` being a string. - - Due to a bug in the database modal, some databases might have been - saved and exported with a string for ``schemas_allowed_for_file_upload``. + Fixes for ``schemas_allowed_for_csv_upload``. """ - schemas_allowed_for_file_upload = data.get("schemas_allowed_for_file_upload") - if isinstance(schemas_allowed_for_file_upload, str): - data["schemas_allowed_for_file_upload"] = json.loads( - schemas_allowed_for_file_upload + # Fix for https://github.com/apache/superset/pull/16756, which temporarily + # changed the V1 schema. We need to support exports made after that PR and + # before this PR. + if "schemas_allowed_for_file_upload" in data: + data["schemas_allowed_for_csv_upload"] = data.pop( + "schemas_allowed_for_file_upload" + ) + + # Fix ``schemas_allowed_for_csv_upload`` being a string. + # Due to a bug in the database modal, some databases might have been + # saved and exported with a string for ``schemas_allowed_for_csv_upload``. + schemas_allowed_for_csv_upload = data.get("schemas_allowed_for_csv_upload") + if isinstance(schemas_allowed_for_csv_upload, str): + data["schemas_allowed_for_csv_upload"] = json.loads( + schemas_allowed_for_csv_upload ) return data @@ -574,11 +582,27 @@ def fix_schemas_allowed_for_csv_upload( metadata_params = fields.Dict(keys=fields.Str(), values=fields.Raw()) engine_params = fields.Dict(keys=fields.Str(), values=fields.Raw()) metadata_cache_timeout = fields.Dict(keys=fields.Str(), values=fields.Integer()) - schemas_allowed_for_file_upload = fields.List(fields.String()) + schemas_allowed_for_csv_upload = fields.List(fields.String()) cost_estimate_enabled = fields.Boolean() class ImportV1DatabaseSchema(Schema): + # pylint: disable=no-self-use, unused-argument + @pre_load + def fix_allow_csv_upload( + self, data: Dict[str, Any], **kwargs: Any + ) -> Dict[str, Any]: + """ + Fix for ``allow_csv_upload`` . + """ + # Fix for https://github.com/apache/superset/pull/16756, which temporarily + # changed the V1 schema. We need to support exports made after that PR and + # before this PR. + if "allow_file_upload" in data: + data["allow_csv_upload"] = data.pop("allow_file_upload") + + return data + database_name = fields.String(required=True) sqlalchemy_uri = fields.String(required=True) password = fields.String(allow_none=True) @@ -587,7 +611,7 @@ class ImportV1DatabaseSchema(Schema): allow_run_async = fields.Boolean() allow_ctas = fields.Boolean() allow_cvas = fields.Boolean() - allow_file_upload = fields.Boolean() + allow_csv_upload = fields.Boolean() extra = fields.Nested(ImportV1DatabaseExtraSchema) uuid = fields.UUID(required=True) version = fields.String(required=True) diff --git a/tests/integration_tests/databases/commands_tests.py b/tests/integration_tests/databases/commands_tests.py index c20d6bbeecac9..0ec96abf9b444 100644 --- a/tests/integration_tests/databases/commands_tests.py +++ b/tests/integration_tests/databases/commands_tests.py @@ -107,7 +107,7 @@ def test_export_database_command(self, mock_g): metadata = yaml.safe_load(contents["databases/examples.yaml"]) assert metadata == ( { - "allow_file_upload": True, + "allow_csv_upload": True, "allow_ctas": True, "allow_cvas": True, "allow_run_async": False, @@ -305,7 +305,7 @@ def test_export_database_command_key_order(self, mock_g): "allow_run_async", "allow_ctas", "allow_cvas", - "allow_file_upload", + "allow_csv_upload", "extra", "uuid", "version", @@ -338,6 +338,41 @@ def test_import_v1_database(self): db.session.delete(database) db.session.commit() + def test_import_v1_database_broken_csv_fields(self): + """ + Test that a database can be imported with broken schema. + + https://github.com/apache/superset/pull/16756 renamed some fields, changing + the V1 schema. This test ensures that we can import databases that were + exported with the broken schema. + """ + broken_config = database_config.copy() + broken_config["allow_file_upload"] = broken_config.pop("allow_csv_upload") + broken_config["extra"] = {"schemas_allowed_for_file_upload": ["upload"]} + + contents = { + "metadata.yaml": yaml.safe_dump(database_metadata_config), + "databases/imported_database.yaml": yaml.safe_dump(broken_config), + } + command = ImportDatabasesCommand(contents) + command.run() + + database = ( + db.session.query(Database).filter_by(uuid=database_config["uuid"]).one() + ) + assert database.allow_file_upload + assert database.allow_ctas + assert database.allow_cvas + assert not database.allow_run_async + assert database.cache_timeout is None + assert database.database_name == "imported_database" + assert database.expose_in_sqllab + assert database.extra == '{"schemas_allowed_for_file_upload": ["upload"]}' + assert database.sqlalchemy_uri == "sqlite:///test.db" + + db.session.delete(database) + db.session.commit() + def test_import_v1_database_multiple(self): """Test that a database can be imported multiple times""" num_databases = db.session.query(Database).count() @@ -359,7 +394,7 @@ def test_import_v1_database_multiple(self): # update allow_file_upload to False new_config = database_config.copy() - new_config["allow_file_upload"] = False + new_config["allow_csv_upload"] = False contents = { "databases/imported_database.yaml": yaml.safe_dump(new_config), "metadata.yaml": yaml.safe_dump(database_metadata_config), diff --git a/tests/integration_tests/fixtures/importexport.py b/tests/integration_tests/fixtures/importexport.py index 098e28d4024db..e4a2ec29d8611 100644 --- a/tests/integration_tests/fixtures/importexport.py +++ b/tests/integration_tests/fixtures/importexport.py @@ -347,7 +347,7 @@ "timestamp": "2021-03-30T20:37:54.791187+00:00", } database_config: Dict[str, Any] = { - "allow_file_upload": True, + "allow_csv_upload": True, "allow_ctas": True, "allow_cvas": True, "allow_run_async": False,