-
Notifications
You must be signed in to change notification settings - Fork 13.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: Minor optimization for updating DatasetDAO columns and metrics #20473
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -163,14 +163,10 @@ def update( | |
""" | ||
|
||
if "columns" in properties: | ||
properties["columns"] = cls.update_columns( | ||
model, properties.get("columns", []), commit=commit | ||
) | ||
properties["columns"] = cls.update_columns(model, properties["columns"]) | ||
|
||
if "metrics" in properties: | ||
properties["metrics"] = cls.update_metrics( | ||
model, properties.get("metrics", []), commit=commit | ||
) | ||
properties["metrics"] = cls.update_metrics(model, properties["metrics"]) | ||
|
||
return super().update(model, properties, commit=commit) | ||
|
||
|
@@ -179,7 +175,6 @@ def update_columns( | |
cls, | ||
model: SqlaTable, | ||
property_columns: List[Dict[str, Any]], | ||
commit: bool = True, | ||
) -> List[TableColumn]: | ||
""" | ||
Creates/updates and/or deletes a list of columns, based on a | ||
|
@@ -190,28 +185,34 @@ def update_columns( | |
- If there are extra columns on the metadata db that are not defined on the List | ||
then we delete. | ||
""" | ||
new_columns = [] | ||
for column in property_columns: | ||
column_id = column.get("id") | ||
|
||
if column_id: | ||
column_obj = db.session.query(TableColumn).get(column_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leverage the columns defined by the model rather than re-querying for each column individually. |
||
column_obj = DatasetDAO.update_column(column_obj, column, commit=commit) | ||
column_by_id = {column.id: column for column in model.columns} | ||
columns = [] | ||
|
||
for properties in property_columns: | ||
if "id" in properties: | ||
columns.append( | ||
DatasetDAO.update_column( | ||
column_by_id[properties["id"]], | ||
properties, | ||
commit=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commit once for the dataset. |
||
) | ||
) | ||
else: | ||
column_obj = DatasetDAO.create_column(column, commit=commit) | ||
new_columns.append(column_obj) | ||
# Checks if an exiting column is missing from properties and delete it | ||
for existing_column in model.columns: | ||
if existing_column.id not in [column.id for column in new_columns]: | ||
DatasetDAO.delete_column(existing_column) | ||
return new_columns | ||
|
||
# Note for new columns the primary key is undefined sans a commit/flush. | ||
columns.append(DatasetDAO.create_column(properties, commit=False)) | ||
|
||
for id_ in {obj.id for obj in model.columns} - {obj.id for obj in columns}: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optimized looping. |
||
DatasetDAO.delete_column(column_by_id[id_], commit=False) | ||
|
||
return columns | ||
|
||
@classmethod | ||
def update_metrics( | ||
cls, | ||
model: SqlaTable, | ||
property_metrics: List[Dict[str, Any]], | ||
commit: bool = True, | ||
) -> List[SqlMetric]: | ||
""" | ||
Creates/updates and/or deletes a list of metrics, based on a | ||
|
@@ -222,21 +223,28 @@ def update_metrics( | |
- If there are extra metrics on the metadata db that are not defined on the List | ||
then we delete. | ||
""" | ||
new_metrics = [] | ||
for metric in property_metrics: | ||
metric_id = metric.get("id") | ||
if metric.get("id"): | ||
metric_obj = db.session.query(SqlMetric).get(metric_id) | ||
metric_obj = DatasetDAO.update_metric(metric_obj, metric, commit=commit) | ||
|
||
metric_by_id = {metric.id: metric for metric in model.metrics} | ||
metrics = [] | ||
|
||
for properties in property_metrics: | ||
if "id" in properties: | ||
metrics.append( | ||
DatasetDAO.update_metric( | ||
metric_by_id[properties["id"]], | ||
properties, | ||
commit=False, | ||
) | ||
) | ||
else: | ||
metric_obj = DatasetDAO.create_metric(metric, commit=commit) | ||
new_metrics.append(metric_obj) | ||
|
||
# Checks if an exiting column is missing from properties and delete it | ||
for existing_metric in model.metrics: | ||
if existing_metric.id not in [metric.id for metric in new_metrics]: | ||
DatasetDAO.delete_metric(existing_metric) | ||
return new_metrics | ||
# Note for new metrics the primary key is undefined sans a commit/flush. | ||
metrics.append(DatasetDAO.create_metric(properties, commit=False)) | ||
|
||
for id_ in {obj.id for obj in model.metrics} - {obj.id for obj in metrics}: | ||
DatasetDAO.delete_column(metric_by_id[id_], commit=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't SQLA automatically delete orphaned metrics once they are not associated with a dataset? |
||
|
||
return metrics | ||
|
||
@classmethod | ||
def find_dataset_column( | ||
|
@@ -254,23 +262,24 @@ def find_dataset_column( | |
|
||
@classmethod | ||
def update_column( | ||
cls, model: TableColumn, properties: Dict[str, Any], commit: bool = True | ||
) -> Optional[TableColumn]: | ||
cls, | ||
model: TableColumn, | ||
properties: Dict[str, Any], | ||
commit: bool = True, | ||
) -> TableColumn: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return is not |
||
return DatasetColumnDAO.update(model, properties, commit=commit) | ||
|
||
@classmethod | ||
def create_column( | ||
cls, properties: Dict[str, Any], commit: bool = True | ||
) -> Optional[TableColumn]: | ||
) -> TableColumn: | ||
""" | ||
Creates a Dataset model on the metadata DB | ||
""" | ||
return DatasetColumnDAO.create(properties, commit=commit) | ||
|
||
@classmethod | ||
def delete_column( | ||
cls, model: TableColumn, commit: bool = True | ||
) -> Optional[TableColumn]: | ||
def delete_column(cls, model: TableColumn, commit: bool = True) -> TableColumn: | ||
""" | ||
Deletes a Dataset column | ||
""" | ||
|
@@ -287,24 +296,27 @@ def find_dataset_metric( | |
return db.session.query(SqlMetric).get(metric_id) | ||
|
||
@classmethod | ||
def delete_metric( | ||
cls, model: SqlMetric, commit: bool = True | ||
) -> Optional[TableColumn]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should have been There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does the command still return the metric when it is deleted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, prob not needed |
||
def delete_metric(cls, model: SqlMetric, commit: bool = True) -> SqlMetric: | ||
""" | ||
Deletes a Dataset metric | ||
""" | ||
return cls.delete(model, commit=commit) | ||
|
||
@classmethod | ||
def update_metric( | ||
cls, model: SqlMetric, properties: Dict[str, Any], commit: bool = True | ||
) -> Optional[SqlMetric]: | ||
cls, | ||
model: SqlMetric, | ||
properties: Dict[str, Any], | ||
commit: bool = True, | ||
) -> SqlMetric: | ||
return DatasetMetricDAO.update(model, properties, commit=commit) | ||
|
||
@classmethod | ||
def create_metric( | ||
cls, properties: Dict[str, Any], commit: bool = True | ||
) -> Optional[SqlMetric]: | ||
cls, | ||
properties: Dict[str, Any], | ||
commit: bool = True, | ||
) -> SqlMetric: | ||
""" | ||
Creates a Dataset model on the metadata DB | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
get(...)
is not needed here per the check on line #165.