Skip to content

Commit

Permalink
fix: improve recovery/uncontamination rate
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobdadams committed Nov 5, 2024
1 parent 77a53b9 commit 54f71e4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 28 deletions.
35 changes: 16 additions & 19 deletions src/wmrc/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,9 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series:
"""Calculates a yearly recovery rate based on the Salesforce records.
Recovery rate is opposite of contaminated rate (5% contamination = 95% uncontaminated). Rate is
calculated by using the contamination rate to determine contaminated tonnage and comparing that to the total
tonnage handled by facilities reporting a contamination rate.
calculated by calculating the total in-state MSW recycled per facility and the total received, which comes from
dividing that amount by the recovery rate per facility, and then dividing the sums of those two values across all
facilities.
Args:
records (helpers.SalesForceRecords): Helper object containing the Salesforce records
Expand All @@ -208,34 +209,30 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series:
pd.Series: recovery rates per year with index name data_year and series name
"annual_recycling_uncontaminated_rate"
"""
#: First, create a modifier to account for material from out-of-state
#: Create our various modifiers
records.df["in_state_modifier"] = (100 - records.df["Out_of_State__c"]) / 100
records.df["msw_modifier"] = records.df["Municipal_Solid_Waste__c"] / 100
records.df["recovery_rate"] = (100 - records.df["Annual_Recycling_Contamination_Rate__c"]) / 100

#: Calculate contaminated tonnage
records.df["recycling_tons_contaminated"] = (
records.df["Annual_Recycling_Contamination_Rate__c"]
/ 100
* records.df["Combined_Total_of_Material_Recycled__c"]
#: Amount of material recycled
records.df["in_state_msw_recycled"] = (
records.df["Combined_Total_of_Material_Recycled__c"]
* records.df["in_state_modifier"]
* records.df["msw_modifier"]
)

#: Calculate total tonnage from facilities reporting a contamination rate
records.df["recycling_tons_report_contamination_total"] = pd.NA
records.df.loc[~records.df["recycling_tons_contaminated"].isnull(), "recycling_tons_report_contamination_total"] = (
records.df["Combined_Total_of_Material_Recycled__c"] * records.df["in_state_modifier"]
#: Amount of material received derived from recovery rate
records.df["in_state_msw_received_for_recycling"] = (
records.df["in_state_msw_recycled"] / records.df["recovery_rate"]
)

#: Invert to get uncontaminated rate
#: Uncontaminated rates by year
clean_rates = records.df.groupby("Calendar_Year__c").apply(
lambda year_df: (
1
- (
year_df["recycling_tons_contaminated"].sum()
/ year_df["recycling_tons_report_contamination_total"].sum()
)
year_df["in_state_msw_recycled"].sum() / year_df["in_state_msw_received_for_recycling"].sum() * 100
)
* 100
)

clean_rates.name = "annual_recycling_uncontaminated_rate"
clean_rates.index.name = "data_year"
clean_rates.index = clean_rates.index.map(helpers.convert_to_int)
Expand Down
20 changes: 11 additions & 9 deletions tests/test_summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,18 @@ def test_recovery_rates_by_tonnage_happy_path(self, mocker):
{
"Calendar_Year__c": [2022, 2022, 2023, 2023],
"Out_of_State__c": [0, 0, 0, 0],
"Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20],
"Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100],
"Municipal_Solid_Waste__c": [100, 100, 100, 100],
"Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50],
"Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40],
}
)

output_series = summarize.recovery_rates_by_tonnage(records)

test_df = pd.Series(
{
2022: 95.0,
2023: 85.0,
2022: 75.0,
2023: 50.0,
},
name="annual_recycling_uncontaminated_rate",
)
Expand All @@ -104,20 +105,21 @@ def test_recovery_rates_by_tonnage_uses_out_of_state_modifier(self, mocker):
records = mocker.Mock()
records.df = pd.DataFrame(
{
"facility_name": ["foo", "bar", "foo", "bar"],
# "facility_name": ["foo", "bar", "foo", "bar"],
"Calendar_Year__c": [2022, 2022, 2023, 2023],
"Out_of_State__c": [0, 100, 0, 100],
"Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20],
"Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100],
"Municipal_Solid_Waste__c": [100, 100, 100, 100],
"Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50],
"Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40],
}
)

output_series = summarize.recovery_rates_by_tonnage(records)

test_df = pd.Series(
{
2022: 90.0,
2023: 90.0,
2022: 50.0,
2023: 50.0,
},
name="annual_recycling_uncontaminated_rate",
)
Expand Down

0 comments on commit 54f71e4

Please sign in to comment.