globaldothealth · abhidg · Sep 8, 2024 · Sep 7, 2024 · Sep 7, 2024 · Sep 7, 2024
diff --git a/src/obr/outbreaks/__init__.py b/src/obr/outbreaks/__init__.py
@@ -4,6 +4,7 @@
 
 from ..plots import (
  get_counts,
+ get_aggregate,
  get_countries_with_status,
  get_countries_with_anyof_statuses,
  plot_epicurve,
@@ -62,6 +63,14 @@
  "button_text": "Download MPXV clades",
  },
  ),
+ (
+ "table/aggregate",
+ get_aggregate,
+ {
+ "country_col": "Location_Admin0",
+ "columns": [("Case_status", "confirmed"), ("Outcome", "death")],
+ },
+ ),
  (
  "data",
  get_countries_with_status,

diff --git a/src/obr/outbreaks/mpox-2024.html b/src/obr/outbreaks/mpox-2024.html
@@ -66,6 +66,14 @@ <h2>Summary</h2>
  </figcaption>
  </figure>
 
+ <figure>
+
+ {{{ aggregate }}}
+
+ <figcaption>
+ <strong>Table 1</strong>: Aggregate data of confirmed cases and deaths by country
+ </figcaption>
+ </figure>
  <figure>
 
  {{{ age_gender }}}

diff --git a/src/obr/plots.py b/src/obr/plots.py
@@ -36,6 +36,16 @@
 pd.options.mode.chained_assignment = None
 
 
+def get_aggregate(
+ df: pd.DataFrame, country_col: str, columns=list[tuple[str, str]]
+) -> pd.DataFrame:
+ "Get aggregate for line list"
+ dfs = []
+ for col, value in columns:
+ dfs.append(df[df[col] == value].groupby(country_col).size().rename(value))
+ return pd.DataFrame(dfs).T.fillna(0).astype(int).reset_index()
+
+
 def get_countries_with_status(
  df: pd.DataFrame,
  country_col: str,

diff --git a/src/obr/sources.py b/src/obr/sources.py
@@ -13,7 +13,7 @@
 
 
 def source_databutton(
- link: str, button_text: str, download_folder: Path = DOWNLOADS
+ _, link: str, button_text: str, download_folder: Path = DOWNLOADS
 ) -> pd.DataFrame:
  options = webdriver.FirefoxOptions()
  options.headless = True

diff --git a/src/obr/util.py b/src/obr/util.py
@@ -203,7 +203,7 @@ def build(
  case "data":
  var.update(plot[1](df, **kwargs))
  case "table":
- var[plot[0].removeprefix("table/")] = plot[1](**kwargs).to_html(
+ var[plot[0].removeprefix("table/")] = plot[1](df, **kwargs).to_html(
  index=False
  )
  case "figure":

diff --git a/tests/test_data.csv b/tests/test_data.csv
@@ -1,8 +1,8 @@
-ID,Case_status,Age,Gender,Date_onset,Date_death,Date_of_first_consult,Data_up_to,Location_District
-1,confirmed,50-55,male,2023-03-05,2023-03-09,,2023-04-04,Bata
-2,probable,40-46,female,2023-02-06,2023-02-14,,2023-04-04,Bata
-3,confirmed,20,male,2023-02-19,,2023-02-25,2023-04-04,Nsoc Nsomo
-4,confirmed,99,female,2023-01-05,2023-01-11,,2023-04-04,Nsoc Nsomo
-5,probable,65,male,2023-01-13,2023-01-19,,2023-04-04,Ebiebyin
-6,confirmed,59,female,2023-03-29,,2023-04-02,2023-04-04,Ebiebyin
-7,confirmed,0,male,2023-02-11,,2023-02-13,2023-04-04,Nsork
+ID,Country,Outcome,Case_status,Age,Gender,Date_onset,Date_death,Date_of_first_consult,Data_up_to,Location_District
+1,A,death,confirmed,50-55,male,2023-03-05,2023-03-09,,2023-04-04,Bata
+2,A,death,probable,40-46,female,2023-02-06,2023-02-14,,2023-04-04,Bata
+3,A,recovered,confirmed,20,male,2023-02-19,,2023-02-25,2023-04-04,Nsoc Nsomo
+4,B,death,confirmed,99,female,2023-01-05,2023-01-11,,2023-04-04,Nsoc Nsomo
+5,B,death,probable,65,male,2023-01-13,2023-01-19,,2023-04-04,Ebiebyin
+6,B,,confirmed,59,female,2023-03-29,,2023-04-02,2023-04-04,Ebiebyin
+7,B,,confirmed,0,male,2023-02-11,,2023-02-13,2023-04-04,Nsork
diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -5,6 +5,7 @@
 
 from obr.plots import (
  get_epicurve,
+ get_aggregate,
  get_delays,
  get_counts,
  get_age_bin_data,
@@ -17,7 +18,6 @@
 DATA = read_csv(Path(__file__).with_name("test_data.csv"), date_columns=["Data_up_to"])
 STATUS_DATA = read_csv(Path(__file__).with_name("test_status_data.csv"))
 
-
 EXPECTED_TIMESERIES_LOCATION_STATUS = """Date_onset_estimated,daily_confirmed,daily_probable,cumulative_confirmed,cumulative_probable,Location_District
 2023-02-06,0,1,0,1,Bata
 2023-03-05,1,0,1,1,Bata
@@ -44,6 +44,14 @@ def test_get_delays(column, expected_delay_series):
  assert list(get_delays(DATA, column).dt.days) == expected_delay_series
 
 
+def test_aggregate():
+ assert get_aggregate(
+ DATA, "Country", [("Case_status", "confirmed"), ("Outcome", "death")]
+ ).equals(
+ pd.DataFrame({"Country": ["A", "B"], "confirmed": [2, 3], "death": [2, 2]})
+ )
+
+
 def test_get_countries_with_anyof_statuses():
  assert get_countries_with_anyof_statuses(
  STATUS_DATA, "Country", ["confirmed", "suspected"]