diff --git a/src/wmrc/summarize.py b/src/wmrc/summarize.py index 0771d24..ff85ded 100644 --- a/src/wmrc/summarize.py +++ b/src/wmrc/summarize.py @@ -198,8 +198,9 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series: """Calculates a yearly recovery rate based on the Salesforce records. Recovery rate is opposite of contaminated rate (5% contamination = 95% uncontaminated). Rate is - calculated by using the contamination rate to determine contaminated tonnage and comparing that to the total - tonnage handled by facilities reporting a contamination rate. + calculated by calculating the total in-state MSW recycled per facility and the total received, which comes from + dividing that amount by the recovery rate per facility, and then dividing the sums of those two values across all + facilities. Args: records (helpers.SalesForceRecords): Helper object containing the Salesforce records @@ -208,34 +209,30 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series: pd.Series: recovery rates per year with index name data_year and series name "annual_recycling_uncontaminated_rate" """ - #: First, create a modifier to account for material from out-of-state + #: Create our various modifiers records.df["in_state_modifier"] = (100 - records.df["Out_of_State__c"]) / 100 + records.df["msw_modifier"] = records.df["Municipal_Solid_Waste__c"] / 100 + records.df["recovery_rate"] = (100 - records.df["Annual_Recycling_Contamination_Rate__c"]) / 100 - #: Calculate contaminated tonnage - records.df["recycling_tons_contaminated"] = ( - records.df["Annual_Recycling_Contamination_Rate__c"] - / 100 - * records.df["Combined_Total_of_Material_Recycled__c"] + #: Amount of material recycled + records.df["in_state_msw_recycled"] = ( + records.df["Combined_Total_of_Material_Recycled__c"] * records.df["in_state_modifier"] + * records.df["msw_modifier"] ) - #: Calculate total tonnage from facilities reporting a contamination rate - records.df["recycling_tons_report_contamination_total"] = pd.NA - records.df.loc[~records.df["recycling_tons_contaminated"].isnull(), "recycling_tons_report_contamination_total"] = ( - records.df["Combined_Total_of_Material_Recycled__c"] * records.df["in_state_modifier"] + #: Amount of material received derived from recovery rate + records.df["in_state_msw_received_for_recycling"] = ( + records.df["in_state_msw_recycled"] / records.df["recovery_rate"] ) - #: Invert to get uncontaminated rate + #: Uncontaminated rates by year clean_rates = records.df.groupby("Calendar_Year__c").apply( lambda year_df: ( - 1 - - ( - year_df["recycling_tons_contaminated"].sum() - / year_df["recycling_tons_report_contamination_total"].sum() - ) + year_df["in_state_msw_recycled"].sum() / year_df["in_state_msw_received_for_recycling"].sum() * 100 ) - * 100 ) + clean_rates.name = "annual_recycling_uncontaminated_rate" clean_rates.index.name = "data_year" clean_rates.index = clean_rates.index.map(helpers.convert_to_int) diff --git a/tests/test_summarize.py b/tests/test_summarize.py index 0014a1e..47598d7 100644 --- a/tests/test_summarize.py +++ b/tests/test_summarize.py @@ -82,8 +82,9 @@ def test_recovery_rates_by_tonnage_happy_path(self, mocker): { "Calendar_Year__c": [2022, 2022, 2023, 2023], "Out_of_State__c": [0, 0, 0, 0], - "Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20], - "Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100], + "Municipal_Solid_Waste__c": [100, 100, 100, 100], + "Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50], + "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40], } ) @@ -91,8 +92,8 @@ def test_recovery_rates_by_tonnage_happy_path(self, mocker): test_df = pd.Series( { - 2022: 95.0, - 2023: 85.0, + 2022: 75.0, + 2023: 50.0, }, name="annual_recycling_uncontaminated_rate", ) @@ -104,11 +105,12 @@ def test_recovery_rates_by_tonnage_uses_out_of_state_modifier(self, mocker): records = mocker.Mock() records.df = pd.DataFrame( { - "facility_name": ["foo", "bar", "foo", "bar"], + # "facility_name": ["foo", "bar", "foo", "bar"], "Calendar_Year__c": [2022, 2022, 2023, 2023], "Out_of_State__c": [0, 100, 0, 100], - "Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20], - "Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100], + "Municipal_Solid_Waste__c": [100, 100, 100, 100], + "Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50], + "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40], } ) @@ -116,8 +118,8 @@ def test_recovery_rates_by_tonnage_uses_out_of_state_modifier(self, mocker): test_df = pd.Series( { - 2022: 90.0, - 2023: 90.0, + 2022: 50.0, + 2023: 50.0, }, name="annual_recycling_uncontaminated_rate", )