Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parametermize adding a column in the FERC1 transform & ensure _correction records end up in the calculation compoent table #3409

Merged
merged 8 commits into from
Feb 20, 2024
249 changes: 128 additions & 121 deletions src/pudl/output/ferc1.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,15 +1217,15 @@ def _out_ferc1__detailed_tags(_core_ferc1__table_dimensions) -> pd.DataFrame:
def _get_tags(
file_name: str, _core_ferc1__table_dimensions: pd.DataFrame
) -> pd.DataFrame:
"""Grab tags from a stored CSV file and apply :func:`make_calculation_dimensions_explicit`."""
"""Grab tags from a stored CSV file and apply :func:`make_xbrl_factoid_dimensions_explicit`."""
tags_csv = importlib.resources.files("pudl.package_data.ferc1") / file_name
tags_df = (
pd.read_csv(tags_csv)
.drop_duplicates()
.dropna(subset=["table_name", "xbrl_factoid"], how="any")
.astype(pd.StringDtype())
.pipe(
pudl.transform.ferc1.make_calculation_dimensions_explicit,
pudl.transform.ferc1.make_xbrl_factoid_dimensions_explicit,
_core_ferc1__table_dimensions,
dimensions=["utility_type", "plant_function", "plant_status"],
)
Expand All @@ -1252,7 +1252,7 @@ def _aggregatable_dimension_tags(
.assign(**{dim: pd.NA for dim in dimensions})
.astype(pd.StringDtype())
.pipe(
pudl.transform.ferc1.make_calculation_dimensions_explicit,
pudl.transform.ferc1.make_xbrl_factoid_dimensions_explicit,
_core_ferc1__table_dimensions,
dimensions=dimensions,
)
Expand Down Expand Up @@ -1330,117 +1330,119 @@ def exploded_tables_asset(
return exploded_tables_asset


EXPLOSION_ARGS = [
{
"root_table": "core_ferc1__yearly_income_statements_sched114",
"table_names": [
"core_ferc1__yearly_income_statements_sched114",
"core_ferc1__yearly_depreciation_summary_sched336",
"core_ferc1__yearly_operating_expenses_sched320",
"core_ferc1__yearly_operating_revenues_sched300",
],
"group_metric_checks": EXPLOSION_CALCULATION_TOLERANCES[
"core_ferc1__yearly_income_statements_sched114"
],
"seed_nodes": [
NodeId(
table_name="core_ferc1__yearly_income_statements_sched114",
xbrl_factoid="net_income_loss",
utility_type="total",
plant_status=pd.NA,
plant_function=pd.NA,
),
],
"off_by_facts": [],
},
{
"root_table": "core_ferc1__yearly_balance_sheet_assets_sched110",
"table_names": [
"core_ferc1__yearly_balance_sheet_assets_sched110",
"core_ferc1__yearly_utility_plant_summary_sched200",
"core_ferc1__yearly_plant_in_service_sched204",
"core_ferc1__yearly_depreciation_by_function_sched219",
],
"group_metric_checks": EXPLOSION_CALCULATION_TOLERANCES[
"core_ferc1__yearly_balance_sheet_assets_sched110"
],
"seed_nodes": [
NodeId(
table_name="core_ferc1__yearly_balance_sheet_assets_sched110",
xbrl_factoid="assets_and_other_debits",
utility_type="total",
plant_status=pd.NA,
plant_function=pd.NA,
)
],
"off_by_facts": [
OffByFactoid(
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_classified_and_property_under_capital_leases",
"electric",
pd.NA,
pd.NA,
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_completed_construction_not_classified",
"electric",
pd.NA,
pd.NA,
),
OffByFactoid(
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_classified_and_property_under_capital_leases",
"electric",
pd.NA,
pd.NA,
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_property_under_capital_leases",
"electric",
pd.NA,
pd.NA,
),
OffByFactoid(
"core_ferc1__yearly_utility_plant_summary_sched200",
"depreciation_utility_plant_in_service",
"electric",
pd.NA,
pd.NA,
"core_ferc1__yearly_utility_plant_summary_sched200",
"amortization_of_other_utility_plant_utility_plant_in_service",
"electric",
pd.NA,
pd.NA,
),
],
},
{
"root_table": "core_ferc1__yearly_balance_sheet_liabilities_sched110",
"table_names": [
"core_ferc1__yearly_balance_sheet_liabilities_sched110",
"core_ferc1__yearly_retained_earnings_sched118",
],
"group_metric_checks": EXPLOSION_CALCULATION_TOLERANCES[
"core_ferc1__yearly_balance_sheet_liabilities_sched110"
],
"seed_nodes": [
NodeId(
table_name="core_ferc1__yearly_balance_sheet_liabilities_sched110",
xbrl_factoid="liabilities_and_other_credits",
utility_type="total",
plant_status=pd.NA,
plant_function=pd.NA,
)
],
"off_by_facts": [],
},
]


def create_exploded_table_assets() -> list[AssetsDefinition]:
"""Create a list of exploded FERC Form 1 assets.

Returns:
A list of :class:`AssetsDefinitions` where each asset is an exploded FERC Form 1
table.
"""
explosion_args = [
{
"root_table": "core_ferc1__yearly_income_statements_sched114",
"table_names": [
"core_ferc1__yearly_income_statements_sched114",
"core_ferc1__yearly_depreciation_summary_sched336",
"core_ferc1__yearly_operating_expenses_sched320",
"core_ferc1__yearly_operating_revenues_sched300",
],
"group_metric_checks": EXPLOSION_CALCULATION_TOLERANCES[
"core_ferc1__yearly_income_statements_sched114"
],
"seed_nodes": [
NodeId(
table_name="core_ferc1__yearly_income_statements_sched114",
xbrl_factoid="net_income_loss",
utility_type="total",
plant_status=pd.NA,
plant_function=pd.NA,
),
],
"off_by_facts": [],
},
{
"root_table": "core_ferc1__yearly_balance_sheet_assets_sched110",
"table_names": [
"core_ferc1__yearly_balance_sheet_assets_sched110",
"core_ferc1__yearly_utility_plant_summary_sched200",
"core_ferc1__yearly_plant_in_service_sched204",
"core_ferc1__yearly_depreciation_by_function_sched219",
],
"group_metric_checks": EXPLOSION_CALCULATION_TOLERANCES[
"core_ferc1__yearly_balance_sheet_assets_sched110"
],
"seed_nodes": [
NodeId(
table_name="core_ferc1__yearly_balance_sheet_assets_sched110",
xbrl_factoid="assets_and_other_debits",
utility_type="total",
plant_status=pd.NA,
plant_function=pd.NA,
)
],
"off_by_facts": [
OffByFactoid(
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_classified_and_property_under_capital_leases",
"electric",
pd.NA,
pd.NA,
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_completed_construction_not_classified",
"electric",
pd.NA,
pd.NA,
),
OffByFactoid(
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_classified_and_property_under_capital_leases",
"electric",
pd.NA,
pd.NA,
"core_ferc1__yearly_utility_plant_summary_sched200",
"utility_plant_in_service_property_under_capital_leases",
"electric",
pd.NA,
pd.NA,
),
OffByFactoid(
"core_ferc1__yearly_utility_plant_summary_sched200",
"depreciation_utility_plant_in_service",
"electric",
pd.NA,
pd.NA,
"core_ferc1__yearly_utility_plant_summary_sched200",
"amortization_of_other_utility_plant_utility_plant_in_service",
"electric",
pd.NA,
pd.NA,
),
],
},
{
"root_table": "core_ferc1__yearly_balance_sheet_liabilities_sched110",
"table_names": [
"core_ferc1__yearly_balance_sheet_liabilities_sched110",
"core_ferc1__yearly_retained_earnings_sched118",
],
"group_metric_checks": EXPLOSION_CALCULATION_TOLERANCES[
"core_ferc1__yearly_balance_sheet_liabilities_sched110"
],
"seed_nodes": [
NodeId(
table_name="core_ferc1__yearly_balance_sheet_liabilities_sched110",
xbrl_factoid="liabilities_and_other_credits",
utility_type="total",
plant_status=pd.NA,
plant_function=pd.NA,
)
],
"off_by_facts": [],
},
]
return [exploded_table_asset_factory(**kwargs) for kwargs in explosion_args]
return [exploded_table_asset_factory(**kwargs) for kwargs in EXPLOSION_ARGS]


exploded_ferc1_assets = create_exploded_table_assets()
Expand Down Expand Up @@ -1747,6 +1749,18 @@ def calc_idx(self: Self) -> list[str]:
"""Primary key columns for calculations in this explosion."""
return [col for col in list(NodeId._fields) if col in self.exploded_pks]

def prep_table_to_explode(
self: Self, table_name: str, table_df: pd.DataFrame
) -> pd.DataFrame:
"""Assign table name and rename factoid column in preparation for explosion."""
xbrl_factoid_name = pudl.transform.ferc1.FERC1_TFR_CLASSES[
table_name
]().params.xbrl_factoid_name
table_df = table_df.assign(table_name=table_name).rename(
columns={xbrl_factoid_name: "xbrl_factoid"}
)
return table_df

def boom(self: Self, tables_to_explode: dict[str, pd.DataFrame]) -> pd.DataFrame:
"""Explode a set of nested tables.

Expand Down Expand Up @@ -1800,14 +1814,8 @@ def initial_explosion_concatenation(
explosion_tables = []
# GRAB/PREP EACH TABLE
for table_name, table_df in tables_to_explode.items():
xbrl_factoid_name = pudl.transform.ferc1.FERC1_TFR_CLASSES[
table_name
]().params.xbrl_factoid_name
tbl = table_df.assign(table_name=table_name).rename(
columns={xbrl_factoid_name: "xbrl_factoid"}
)
tbl = self.prep_table_to_explode(table_name, table_df)
explosion_tables.append(tbl)

exploded = pd.concat(explosion_tables)

# Identify which dimensions apply to the curent explosion -- not all collections
Expand Down Expand Up @@ -2724,11 +2732,7 @@ def forest_as_table(self: Self) -> pd.DataFrame:
# Convert them into the first layer of the dataframe:
layer0_df = pd.DataFrame(layer0_nodes).rename(columns=lambda x: x + "_layer0")

return (
self._add_layers_to_forest_as_table(df=layer0_df)
.dropna(axis="columns", how="all")
.convert_dtypes()
)
return self._add_layers_to_forest_as_table(df=layer0_df).convert_dtypes()

def _add_layers_to_forest_as_table(self: Self, df: pd.DataFrame) -> pd.DataFrame:
"""Recursively add additional layers of nodes from the forest to the table.
Expand All @@ -2753,7 +2757,7 @@ def _add_layers_to_forest_as_table(self: Self, df: pd.DataFrame) -> pd.DataFrame
parent_nodes = list(
df[parent_cols]
.drop_duplicates()
.dropna(how="all")
.dropna(how="all", axis="rows")
.rename(columns=lambda x: x.removesuffix(suffix))
.itertuples(name="NodeId", index=False)
)
Expand Down Expand Up @@ -2885,7 +2889,10 @@ def _propagate_tags_to_corrections(annotated_forest: nx.DiGraph) -> nx.DiGraph:
# for every correction node, we assume that that nodes parent tags can apply
parents = list(annotated_forest.predecessors(correction_node))
# all correction records shoul have a parent and only one
assert len(parents) == 1
if len(parents) != 1:
raise AssertionError(
f"Found more than one parent node for {correction_node=}\n{parents=}"
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
)
parent = parents[0]
correction_tags[correction_node] = {
"tags": existing_tags.get(parent, {})
Expand Down
2 changes: 1 addition & 1 deletion src/pudl/transform/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,7 +1236,7 @@ def rename_columns(
logger.warning(
f"{self.table_id.value}: Attempting to rename columns which are not "
"present in the dataframe.\n"
f"Missing columns: {missing_cols}"
f"Missing columns: {missing_cols}\nExisting Columns: {df.columns}"
)
return df.rename(columns=params.columns)

Expand Down
Loading