fix: improve recovery/uncontamination rate

agrc · Nov 5, 2024 · 54f71e4 · 54f71e4
1 parent 77a53b9
commit 54f71e4
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 28 deletions.
diff --git a/src/wmrc/summarize.py b/src/wmrc/summarize.py
@@ -198,8 +198,9 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series:
     """Calculates a yearly recovery rate based on the Salesforce records.
 
     Recovery rate is opposite of contaminated rate (5% contamination = 95% uncontaminated). Rate is
-    calculated by using the contamination rate to determine contaminated tonnage and comparing that to the total
-    tonnage handled by facilities reporting a contamination rate.
+    calculated by calculating the total in-state MSW recycled per facility and the total received, which comes from
+    dividing that amount by the recovery rate per facility, and then dividing the sums of those two values across all
+    facilities.
 
     Args:
         records (helpers.SalesForceRecords): Helper object containing the Salesforce records
@@ -208,34 +209,30 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series:
         pd.Series: recovery rates per year with index name data_year and series name
             "annual_recycling_uncontaminated_rate"
     """
-    #: First, create a modifier to account for material from out-of-state
+    #: Create our various modifiers
     records.df["in_state_modifier"] = (100 - records.df["Out_of_State__c"]) / 100
+    records.df["msw_modifier"] = records.df["Municipal_Solid_Waste__c"] / 100
+    records.df["recovery_rate"] = (100 - records.df["Annual_Recycling_Contamination_Rate__c"]) / 100
 
-    #: Calculate contaminated tonnage
-    records.df["recycling_tons_contaminated"] = (
-        records.df["Annual_Recycling_Contamination_Rate__c"]
-        / 100
-        * records.df["Combined_Total_of_Material_Recycled__c"]
+    #: Amount of material recycled
+    records.df["in_state_msw_recycled"] = (
+        records.df["Combined_Total_of_Material_Recycled__c"]
         * records.df["in_state_modifier"]
+        * records.df["msw_modifier"]
     )
 
-    #: Calculate total tonnage from facilities reporting a contamination rate
-    records.df["recycling_tons_report_contamination_total"] = pd.NA
-    records.df.loc[~records.df["recycling_tons_contaminated"].isnull(), "recycling_tons_report_contamination_total"] = (
-        records.df["Combined_Total_of_Material_Recycled__c"] * records.df["in_state_modifier"]
+    #: Amount of material received derived from recovery rate
+    records.df["in_state_msw_received_for_recycling"] = (
+        records.df["in_state_msw_recycled"] / records.df["recovery_rate"]
     )
 
-    #: Invert to get uncontaminated rate
+    #: Uncontaminated rates by year
     clean_rates = records.df.groupby("Calendar_Year__c").apply(
         lambda year_df: (
-            1
-            - (
-                year_df["recycling_tons_contaminated"].sum()
-                / year_df["recycling_tons_report_contamination_total"].sum()
-            )
+            year_df["in_state_msw_recycled"].sum() / year_df["in_state_msw_received_for_recycling"].sum() * 100
         )
-        * 100
     )
+
     clean_rates.name = "annual_recycling_uncontaminated_rate"
     clean_rates.index.name = "data_year"
     clean_rates.index = clean_rates.index.map(helpers.convert_to_int)

diff --git a/tests/test_summarize.py b/tests/test_summarize.py
@@ -82,17 +82,18 @@ def test_recovery_rates_by_tonnage_happy_path(self, mocker):
             {
                 "Calendar_Year__c": [2022, 2022, 2023, 2023],
                 "Out_of_State__c": [0, 0, 0, 0],
-                "Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20],
-                "Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100],
+                "Municipal_Solid_Waste__c": [100, 100, 100, 100],
+                "Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50],
+                "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40],
             }
         )
 
         output_series = summarize.recovery_rates_by_tonnage(records)
 
         test_df = pd.Series(
             {
-                2022: 95.0,
-                2023: 85.0,
+                2022: 75.0,
+                2023: 50.0,
             },
             name="annual_recycling_uncontaminated_rate",
         )
@@ -104,20 +105,21 @@ def test_recovery_rates_by_tonnage_uses_out_of_state_modifier(self, mocker):
         records = mocker.Mock()
         records.df = pd.DataFrame(
             {
-                "facility_name": ["foo", "bar", "foo", "bar"],
+                # "facility_name": ["foo", "bar", "foo", "bar"],
                 "Calendar_Year__c": [2022, 2022, 2023, 2023],
                 "Out_of_State__c": [0, 100, 0, 100],
-                "Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20],
-                "Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100],
+                "Municipal_Solid_Waste__c": [100, 100, 100, 100],
+                "Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50],
+                "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40],
             }
         )
 
         output_series = summarize.recovery_rates_by_tonnage(records)
 
         test_df = pd.Series(
             {
-                2022: 90.0,
-                2023: 90.0,
+                2022: 50.0,
+                2023: 50.0,
             },
             name="annual_recycling_uncontaminated_rate",
         )