From 0e2e4ceaa97d942330b77c2bddbbdbc2a5f78113 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 10 Jan 2022 15:15:20 +0000 Subject: [PATCH 1/5] fix: Change default SECRET_KEY, improve docs and banner warning on default --- docs/src/pages/docs/installation/configuring.mdx | 5 +++-- superset/config.py | 7 +++++-- superset/constants.py | 1 + superset/initialization/__init__.py | 14 ++++++++++++++ 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/docs/src/pages/docs/installation/configuring.mdx b/docs/src/pages/docs/installation/configuring.mdx index 922f97d7a2d4b..f5ae5d2d12f8b 100644 --- a/docs/src/pages/docs/installation/configuring.mdx +++ b/docs/src/pages/docs/installation/configuring.mdx @@ -21,7 +21,7 @@ SUPERSET_WEBSERVER_PORT = 8088 # Flask App Builder configuration # Your App secret key -SECRET_KEY = '\2\1thisismyscretkey\1\2\e\y\y\h' +SECRET_KEY = 'USE_YOUR_OWN_SECURE_RANDOM_KEY' # The SQLAlchemy connection string to your database backend # This connection defines the path to the database that stores your @@ -56,7 +56,8 @@ for more information on how to configure it. Make sure to change: - `SQLALCHEMY_DATABASE_URI`: by default it is stored at ~/.superset/superset.db -- `SECRET_KEY`: to a long random string +- `SECRET_KEY`: Use a strong complex alphanumeric string and use a tool + to help you generate a sufficiently random sequence, ex: openssl rand -base64 42" If you need to exempt endpoints from CSRF (e.g. if you are running a custom auth postback endpoint), you can add the endpoints to `WTF_CSRF_EXEMPT_LIST`: diff --git a/superset/config.py b/superset/config.py index 8bee76bee0ea5..952bd018c3a8a 100644 --- a/superset/config.py +++ b/superset/config.py @@ -41,6 +41,7 @@ from typing_extensions import Literal from werkzeug.local import LocalProxy +from superset.constants import CHANGE_ME_SECRET_KEY from superset.jinja_context import BaseTemplateProcessor from superset.stats_logger import DummyStatsLogger from superset.typing import CacheConfig @@ -160,8 +161,10 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]: SQLALCHEMY_TRACK_MODIFICATIONS = False # --------------------------------------------------------- -# Your App secret key -SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h" +# Your App secret key. Make sure you override it on superset_config.py. +# Use a strong complex alphanumeric string and use a tool to help you generate +# a sufficiently random sequence, ex: openssl rand -base64 42" +SECRET_KEY = CHANGE_ME_SECRET_KEY # The SQLAlchemy connection string. SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "superset.db") diff --git a/superset/constants.py b/superset/constants.py index bffb02ceea114..1e02bcc0cba71 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -22,6 +22,7 @@ NULL_STRING = "" +CHANGE_ME_SECRET_KEY = "CHANGE_ME_TO_A_COMPLEX_RANDOM_SECRET" # UUID for the examples database EXAMPLES_DB_UUID = "a2dc77af-e654-49bb-b321-40f6b559a1ee" diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index e33c038b11a78..33022209ed390 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -29,6 +29,7 @@ from werkzeug.middleware.proxy_fix import ProxyFix from superset.connectors.connector_registry import ConnectorRegistry +from superset.constants import CHANGE_ME_SECRET_KEY from superset.extensions import ( _event_logger, APP_DIR, @@ -572,12 +573,25 @@ def init_app_in_ctx(self) -> None: self.init_views() + def check_secret_key(self) -> None: + if self.config["SECRET_KEY"] == CHANGE_ME_SECRET_KEY: + logger.warning(80 * "-" + "\n" + 36 * " " + "WARNING\n" + 80 * "-") + logger.warning( + "A Default SECRET_KEY was detected please use superset_config.py " + "to override it.\n" + "Use a strong complex alphanumeric string and use a tool to help" + " you generate \n" + "a sufficiently random sequence, ex: openssl rand -base64 42" + ) + logger.warning(80 * "-" + "\n" + 80 * "-") + def init_app(self) -> None: """ Main entry point which will delegate to other methods in order to fully init the app """ self.pre_init() + self.check_secret_key() # Configuration of logging must be done first to apply the formatter properly self.configure_logging() # Configuration of feature_flags must be done first to allow init features From e0764cf604705b9e7b309834fe2bb405d288567a Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 10 Jan 2022 17:31:43 +0000 Subject: [PATCH 2/5] lint --- superset/initialization/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index 33022209ed390..d2e923c9addd2 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -575,7 +575,9 @@ def init_app_in_ctx(self) -> None: def check_secret_key(self) -> None: if self.config["SECRET_KEY"] == CHANGE_ME_SECRET_KEY: - logger.warning(80 * "-" + "\n" + 36 * " " + "WARNING\n" + 80 * "-") + top_banner = 80 * "-" + "\n" + 36 * " " + "WARNING\n" + 80 * "-" + bottom_banner = 80 * "-" + "\n" + 80 * "-" + logger.warning(top_banner) logger.warning( "A Default SECRET_KEY was detected please use superset_config.py " "to override it.\n" @@ -583,7 +585,7 @@ def check_secret_key(self) -> None: " you generate \n" "a sufficiently random sequence, ex: openssl rand -base64 42" ) - logger.warning(80 * "-" + "\n" + 80 * "-") + logger.warning(bottom_banner) def init_app(self) -> None: """ From 3366d429559821cedb1f0cabc8e4bf2a43d9f796 Mon Sep 17 00:00:00 2001 From: Daniel Vaz Gaspar Date: Tue, 11 Jan 2022 09:23:00 +0000 Subject: [PATCH 3/5] Update superset/initialization/__init__.py Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> --- superset/initialization/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index d2e923c9addd2..95e260d2a41de 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -579,7 +579,7 @@ def check_secret_key(self) -> None: bottom_banner = 80 * "-" + "\n" + 80 * "-" logger.warning(top_banner) logger.warning( - "A Default SECRET_KEY was detected please use superset_config.py " + "A Default SECRET_KEY was detected, please use superset_config.py " "to override it.\n" "Use a strong complex alphanumeric string and use a tool to help" " you generate \n" From efe32a69bb5c9c6898f0a8b7c268ff61fa4f2daf Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Tue, 11 Jan 2022 12:41:31 +0000 Subject: [PATCH 4/5] add a secret migration procedure, update UPDATING --- UPDATING.md | 1 + superset/cli.py | 28 +++++++++ superset/utils/encrypt.py | 120 +++++++++++++++++++++++++++++++++++++- 3 files changed, 146 insertions(+), 3 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 46ae027f6d397..60b39cf409220 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -26,6 +26,7 @@ assists people when migrating to a new version. ### Breaking Changes +- [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets - [17556](https://github.com/apache/superset/pull/17556): Bumps mysqlclient from v1 to v2 - [15254](https://github.com/apache/superset/pull/15254): Previously `QUERY_COST_FORMATTERS_BY_ENGINE`, `SQL_VALIDATORS_BY_ENGINE` and `SCHEDULED_QUERIES` were expected to be defined in the feature flag dictionary in the `config.py` file. These should now be defined as a top-level config, with the feature flag dictionary being reserved for boolean only values. - [17290](https://github.com/apache/superset/pull/17290): Bumps pandas to `1.3.4` and pyarrow to `5.0.0` diff --git a/superset/cli.py b/superset/cli.py index bef7dd04cb02b..721d26b8c3329 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -40,6 +40,7 @@ from superset.extensions import celery_app, db from superset.utils import core as utils from superset.utils.celery import session_scope +from superset.utils.encrypt import SecretsMigrator from superset.utils.urls import get_url_path logger = logging.getLogger(__name__) @@ -870,3 +871,30 @@ def update_api_docs() -> None: json.dump(api_spec.to_dict(), outfile, sort_keys=True, indent=2) else: click.secho("API version not found", err=True) + + +@superset.command() +@with_appcontext +@click.option( + "--previous_secret_key", + "-a", + required=False, + help="An optional previous secret key, if PREVIOUS_SECRET_KEY is not set on the config", +) +def re_encrypt_secrets(previous_secret_key: Optional[str] = None) -> None: + previous_secret_key = previous_secret_key or current_app.config.get( + "PREVIOUS_SECRET_KEY" + ) + if previous_secret_key is None: + click.secho("A previous secret key must be provided", err=True) + sys.exit(1) + secrets_migrator = SecretsMigrator(previous_secret_key=previous_secret_key) + try: + secrets_migrator.run() + except ValueError as exc: + click.secho( + f"An error occurred, " + f"probably an invalid previoud secret key was provided. Error:[{exc}]", + err=True, + ) + sys.exit(1) diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 240ca5aa8bf91..4acd912f5c395 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -14,13 +14,17 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import logging from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional from flask import Flask -from sqlalchemy import TypeDecorator +from sqlalchemy import text, TypeDecorator +from sqlalchemy.engine import Connection, Dialect, RowProxy from sqlalchemy_utils import EncryptedType +logger = logging.getLogger(__name__) + class AbstractEncryptedFieldAdapter(ABC): # pylint: disable=too-few-public-methods @abstractmethod @@ -28,7 +32,7 @@ def create( self, app_config: Optional[Dict[str, Any]], *args: List[Any], - **kwargs: Optional[Dict[str, Any]] + **kwargs: Optional[Dict[str, Any]], ) -> TypeDecorator: pass @@ -40,7 +44,7 @@ def create( self, app_config: Optional[Dict[str, Any]], *args: List[Any], - **kwargs: Optional[Dict[str, Any]] + **kwargs: Optional[Dict[str, Any]], ) -> TypeDecorator: if app_config: return EncryptedType(*args, app_config["SECRET_KEY"], **kwargs) @@ -66,3 +70,113 @@ def create( return self._concrete_type_adapter.create(self._config, *args, **kwargs) raise Exception("App not initialized yet. Please call init_app first") + + +class SecretsMigrator: + def __init__(self, previous_secret_key: str) -> None: + from superset import db + + self.db = db + self._previous_secret_key = previous_secret_key + self._dialect: Dialect = db.engine.url.get_dialect() + + def _discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]: + """ + Iterates over SqlAlchemy's metadata, looking for EncryptedType + columns along the way. Builds up a dict of + table_name -> dict of col_name: enc type instance + :return: + """ + meta_info: Dict[str, Any] = {} + + for table_name, table in self.db.metadata.tables.items(): + for col_name, col in table.columns.items(): + if isinstance(col.type, EncryptedType): + cols = meta_info.get(table_name, {}) + cols[col_name] = col.type + meta_info[table_name] = cols + + return meta_info + + @staticmethod + def _read_bytes(col_name: str, value: Any) -> Optional[bytes]: + if value is None or isinstance(value, bytes): + return value + # Note that the Postgres Driver returns memoryview's for BLOB types + elif isinstance(value, memoryview): + return value.tobytes() + elif isinstance(value, str): + return bytes(value.encode("utf8")) + + # Just bail if we haven't seen this type before... + raise ValueError(f"DB column {col_name} has unknown type: {type(value)}") + + def _select_columns_from_table( + self, conn: Connection, column_names: List[str], table_name: str + ) -> RowProxy: + return conn.execute(f"SELECT id, {','.join(column_names)} FROM {table_name}") + + def _re_encrypt_row( + self, + conn: Connection, + row: RowProxy, + table_name: str, + columns: Dict[str, EncryptedType], + ) -> None: + """ + Re encrypts all columns in a Row + :param row: Current row to reencrypt + :param columns: Meta info from columns + """ + re_encrypted_columns = {} + + for column_name, encrypted_type in columns.items(): + previous_encrypted_type = EncryptedType( + type_in=encrypted_type.underlying_type, key=self._previous_secret_key + ) + try: + unencrypted_value = previous_encrypted_type.process_result_value( + self._read_bytes(column_name, row[column_name]), self._dialect + ) + except ValueError as exc: + # Failed to unencrypt + try: + encrypted_type.process_result_value( + self._read_bytes(column_name, row[column_name]), self._dialect + ) + logger.info( + "Current secret is able to decrypt value on column [%s.%s]," + " nothing to do", + table_name, + column_name, + ) + return + except Exception: + raise exc + + re_encrypted_columns[column_name] = encrypted_type.process_bind_param( + unencrypted_value, self._dialect, + ) + + set_cols = ",".join( + [f"{name} = :{name}" for name in re_encrypted_columns.keys()] + ) + logger.info("Processing table: %s", table_name) + conn.execute( + text(f"UPDATE {table_name} SET {set_cols} WHERE id = :id"), + id=row["id"], + **re_encrypted_columns, + ) + + def run(self) -> None: + encrypted_meta_info = self._discover_encrypted_fields() + + with self.db.engine.begin() as conn: + logger.info("Collecting info for re encryption") + for table_name, columns in encrypted_meta_info.items(): + column_names = list(columns.keys()) + rows = self._select_columns_from_table(conn, column_names, table_name) + + for row in rows: + self._re_encrypt_row(conn, row, table_name, columns) + logger.info("All tables processed") From 1ccd34af06827b6b6d3b07f8e6a9bc7ca9f10bc0 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Tue, 11 Jan 2022 13:25:04 +0000 Subject: [PATCH 5/5] fix lint --- UPDATING.md | 2 +- superset/cli.py | 3 ++- superset/utils/encrypt.py | 23 ++++++++++++----------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 60b39cf409220..291c85e0fabf5 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -26,7 +26,7 @@ assists people when migrating to a new version. ### Breaking Changes -- [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets +- [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` (ex: PREVIOUS_SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h") with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets - [17556](https://github.com/apache/superset/pull/17556): Bumps mysqlclient from v1 to v2 - [15254](https://github.com/apache/superset/pull/15254): Previously `QUERY_COST_FORMATTERS_BY_ENGINE`, `SQL_VALIDATORS_BY_ENGINE` and `SCHEDULED_QUERIES` were expected to be defined in the feature flag dictionary in the `config.py` file. These should now be defined as a top-level config, with the feature flag dictionary being reserved for boolean only values. - [17290](https://github.com/apache/superset/pull/17290): Bumps pandas to `1.3.4` and pyarrow to `5.0.0` diff --git a/superset/cli.py b/superset/cli.py index 721d26b8c3329..98d43cc47cb2a 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -879,7 +879,8 @@ def update_api_docs() -> None: "--previous_secret_key", "-a", required=False, - help="An optional previous secret key, if PREVIOUS_SECRET_KEY is not set on the config", + help="An optional previous secret key, if PREVIOUS_SECRET_KEY " + "is not set on the config", ) def re_encrypt_secrets(previous_secret_key: Optional[str] = None) -> None: previous_secret_key = previous_secret_key or current_app.config.get( diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 4acd912f5c395..0fa2a6d177a06 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -74,13 +74,13 @@ def create( class SecretsMigrator: def __init__(self, previous_secret_key: str) -> None: - from superset import db + from superset import db # pylint: disable=import-outside-toplevel - self.db = db + self._db = db self._previous_secret_key = previous_secret_key self._dialect: Dialect = db.engine.url.get_dialect() - def _discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]: + def discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]: """ Iterates over SqlAlchemy's metadata, looking for EncryptedType columns along the way. Builds up a dict of @@ -89,7 +89,7 @@ def _discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]: """ meta_info: Dict[str, Any] = {} - for table_name, table in self.db.metadata.tables.items(): + for table_name, table in self._db.metadata.tables.items(): for col_name, col in table.columns.items(): if isinstance(col.type, EncryptedType): cols = meta_info.get(table_name, {}) @@ -103,16 +103,17 @@ def _read_bytes(col_name: str, value: Any) -> Optional[bytes]: if value is None or isinstance(value, bytes): return value # Note that the Postgres Driver returns memoryview's for BLOB types - elif isinstance(value, memoryview): + if isinstance(value, memoryview): return value.tobytes() - elif isinstance(value, str): + if isinstance(value, str): return bytes(value.encode("utf8")) # Just bail if we haven't seen this type before... raise ValueError(f"DB column {col_name} has unknown type: {type(value)}") + @staticmethod def _select_columns_from_table( - self, conn: Connection, column_names: List[str], table_name: str + conn: Connection, column_names: List[str], table_name: str ) -> RowProxy: return conn.execute(f"SELECT id, {','.join(column_names)} FROM {table_name}") @@ -152,14 +153,14 @@ def _re_encrypt_row( ) return except Exception: - raise exc + raise Exception from exc re_encrypted_columns[column_name] = encrypted_type.process_bind_param( unencrypted_value, self._dialect, ) set_cols = ",".join( - [f"{name} = :{name}" for name in re_encrypted_columns.keys()] + [f"{name} = :{name}" for name in list(re_encrypted_columns.keys())] ) logger.info("Processing table: %s", table_name) conn.execute( @@ -169,9 +170,9 @@ def _re_encrypt_row( ) def run(self) -> None: - encrypted_meta_info = self._discover_encrypted_fields() + encrypted_meta_info = self.discover_encrypted_fields() - with self.db.engine.begin() as conn: + with self._db.engine.begin() as conn: logger.info("Collecting info for re encryption") for table_name, columns in encrypted_meta_info.items(): column_names = list(columns.keys())