Merge branch 'master' into wizard-anomalist

owid · Oct 16, 2024 · aab0dc2 · aab0dc2
2 parents 786f81a + 486bb78
commit aab0dc2
Show file tree

Hide file tree

Showing 12 changed files with 82 additions and 32 deletions.
diff --git a/etl/steps/data/garden/oecd/2024-08-21/official_development_assistance.meta.yml b/etl/steps/data/garden/oecd/2024-08-21/official_development_assistance.meta.yml
diff --git a/etl/steps/data/garden/oecd/2024-08-21/official_development_assistance.py b/etl/steps/data/garden/oecd/2024-08-21/official_development_assistance.py
@@ -210,6 +210,8 @@ def run(dest_dir: str) -> None:
 
     tb_dac1 = remove_jumps_in_the_data_and_unneeded_cols(tb=tb_dac1)
 
+    tb_dac1 = limit_grant_equivalents_from_2018_only(tb=tb_dac1)
+
     tb = add_donor_data_from_recipient_dataset(tb_donor=tb_dac1, tb_recipient=tb_dac2a)
 
     tb = add_recipient_dataset(tb=tb, tb_recipient=tb_dac2a)
@@ -525,7 +527,7 @@ def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
     This is most likely because of aggregations of population and GNI not properly done by the source.
     This is a temporary solution until the source fixes the data. It is already reported.
 
-    # Also, remove redundant columns.
+    Also, remove redundant columns.
     """
 
     # For i_oda_net_disbursements_share_gni
@@ -545,3 +547,18 @@ def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
     )
 
     return tb
+
+
+def limit_grant_equivalents_from_2018_only(tb: Table) -> Table:
+    """
+    Limit grant equivalent indicators from year 2018 onwards.
+    """
+
+    tb = tb.copy()
+
+    # Define grant equivalent indicators by looking at all the columns containing the word "grant_equivalents"
+    grant_equivalent_indicators = [col for col in tb.columns if "grant_equivalents" in col]
+
+    tb.loc[tb["year"] < 2018, grant_equivalent_indicators] = None
+
+    return tb
diff --git a/etl/steps/data/garden/wb/2024-10-07/shared.py b/etl/steps/data/garden/wb/2024-10-07/shared.py
@@ -9,6 +9,14 @@
 
 non_market_income_description = "Non-market sources of income, including food grown by subsistence farmers for their own consumption, are taken into account."
 
+nowcast_regions_description = "Regional and global estimates are extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts)."
+
+relative_poverty_description = "This is a measure of _relative_ poverty – it captures the share of people whose income is low by the standards typical in their own country."
+
+ppp_description = "The data is measured in international-$ at {ppp} prices – this adjusts for inflation and for differences in the cost of living between countries."
+
+prosperity_gap_description = "The _prosperity gap_ indicator measures how much income would need to be multiplied to ensure everyone reaches a standard of prosperity, which is defined as $25 per person per day. This gives a sense of how far global incomes are from a basic prosperity standard."
+
 processing_description_relative_poverty = "Measures of relative poverty are not directly available in the World Bank PIP data. To calculate this metric we take the median income or consumption for the country and year, calculate a relative poverty line – in this case {povline} of the median – and then run a specific query on the PIP API to return the share of population below that line."
 
 processing_description_thr = "Income and consumption thresholds by decile are not directly available in the World Bank PIP API. We extract the metric primarily from [auxiliary percentiles data provided by the World Bank](https://datacatalog.worldbank.org/search/dataset/0063646). Missing country values and regional aggregations of the indicator are calculated by running multiple queries on the API to obtain the closest poverty line to each threshold."
@@ -19,10 +27,6 @@
 
 processing_description_avg = "Income and consumption averages by decile are not directly available in the World Bank PIP API. We calculate the metric by multiplying the share of each decile by the mean income or consumption of the distribution and dividing by the population share of the decile (10%)."
 
-relative_poverty_description = "This is a measure of _relative_ poverty – it captures the share of people whose income is low by the standards typical in their own country."
-
-ppp_description = "The data is measured in international-$ at {ppp} prices – this adjusts for inflation and for differences in the cost of living between countries."
-
 
 # Define default tolerance for each variable
 TOLERANCE = 5
@@ -102,7 +106,7 @@
     },
     "pg": {
         "title": "Prosperity gap",
-        "description": "Average factor by which incomes need to be multiplied to bring everyone in the world to the prosperity standard of $25 per person per day.",
+        "description": "Average factor by which incomes need to be multiplied to bring everyone to the prosperity standard of $25 per person per day.",
         "unit": "",
         "short_unit": "",
         "numDecimalPlaces": 1,
@@ -581,12 +585,37 @@ def var_metadata_inequality_mean_median(var, origins, welfare_type) -> VariableM
                 ppp_description,
                 inc_cons_dict[welfare_type]["description"],
                 non_market_income_description,
+                nowcast_regions_description,
+            ],
+            description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
+            unit=var_dict[var]["unit"],
+            short_unit=var_dict[var]["short_unit"],
+            origins=origins,
+        )
+        meta.display = {
+            "name": meta.title,
+            "numDecimalPlaces": var_dict[var]["numDecimalPlaces"],
+            "tolerance": TOLERANCE,
+        }
+
+        meta.presentation = VariablePresentationMeta(title_public=meta.title)
+
+    elif var in ["top1_thr", "top1_avg", "spl", "spr"]:
+        meta = VariableMeta(
+            title=f"{var_dict[var]['title']}",
+            description_short=var_dict[var]["description"],
+            description_key=[
+                ppp_description,
+                inc_cons_dict[welfare_type]["description"],
+                non_market_income_description,
+                nowcast_regions_description,
             ],
             description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
             unit=var_dict[var]["unit"],
             short_unit=var_dict[var]["short_unit"],
             origins=origins,
         )
+
         meta.display = {
             "name": meta.title,
             "numDecimalPlaces": var_dict[var]["numDecimalPlaces"],
@@ -595,14 +624,16 @@ def var_metadata_inequality_mean_median(var, origins, welfare_type) -> VariableM
 
         meta.presentation = VariablePresentationMeta(title_public=meta.title)
 
-    elif var in ["top1_thr", "top1_avg", "spl", "spr", "pg"]:
+    elif var in ["pg"]:
         meta = VariableMeta(
             title=f"{var_dict[var]['title']}",
             description_short=var_dict[var]["description"],
             description_key=[
+                prosperity_gap_description,
                 ppp_description,
                 inc_cons_dict[welfare_type]["description"],
                 non_market_income_description,
+                nowcast_regions_description,
             ],
             description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
             unit=var_dict[var]["unit"],
@@ -676,6 +707,7 @@ def var_metadata_absolute_povlines(var, povline, origins, ppp_version, welfare_t
         ppp_description,
         inc_cons_dict[welfare_type]["description"],
         non_market_income_description,
+        nowcast_regions_description,
     ]
 
     # Remove empty strings from the list
@@ -713,6 +745,7 @@ def var_metadata_between_absolute_povlines(var, povline1, povline2, origins, ppp
             ppp_description,
             inc_cons_dict[welfare_type]["description"],
             non_market_income_description,
+            nowcast_regions_description,
         ],
         description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
         unit=var_dict[var]["unit"].replace("{ppp}", str(ppp_version)),
@@ -751,6 +784,7 @@ def var_metadata_relative_povlines(var, rel, origins, welfare_type) -> VariableM
             relative_poverty_description,
             inc_cons_dict[welfare_type]["description"],
             non_market_income_description,
+            nowcast_regions_description,
         ],
         description_processing=f"""{processing_description_relative_poverty}
 
@@ -783,6 +817,7 @@ def var_metadata_percentiles(var, pct, origins, ppp_version, welfare_type) -> Va
                 ppp_description,
                 inc_cons_dict[welfare_type]["description"],
                 non_market_income_description,
+                nowcast_regions_description,
             ],
             description_processing=f"""{processing_description_thr}
 

diff --git a/etl/steps/data/garden/wb/2024-10-07/world_bank_pip.meta.yml b/etl/steps/data/garden/wb/2024-10-07/world_bank_pip.meta.yml
@@ -527,7 +527,5 @@ tables:
         description_key:
           - The regions employed by the World Bank PIP differ from the regional groupings used by the World Bank in other contexts.
           - Some economies, mostly high-income economies, are excluded from the geographical regions and are included as a separate group referred to as “other high income” (or “industrialized economies” or “rest of the world” in earlier publications).
-        display:
-          tolerance: 40
         presentation:
           title_public: World regions according to World Bank Poverty and Inequality Platform
diff --git a/snapshots/climate/latest/weekly_wildfires.csv.dvc b/snapshots/climate/latest/weekly_wildfires.csv.dvc
@@ -9,8 +9,8 @@ meta:
     citation_full: Global Wildfire Information System
     attribution_short: GWIS
     url_main: https://gwis.jrc.ec.europa.eu/apps/gwis.statistics/seasonaltrend
-    date_accessed: 2024-10-15
-    date_published: 2024-10-15
+    date_accessed: 2024-10-16
+    date_published: 2024-10-16
     license:
       name: CC BY 4.0
       url: https://gwis.jrc.ec.europa.eu/about-gwis/data-license

diff --git a/snapshots/covid/latest/cases_deaths.csv.dvc b/snapshots/covid/latest/cases_deaths.csv.dvc
@@ -22,7 +22,7 @@ meta:
     version_producer: WHO COVID-19 Dashboard - Daily cases and deaths
     url_main: https://covid19.who.int/
     url_download: https://covid19.who.int/WHO-COVID-19-global-data.csv
-    date_accessed: 2024-10-15
+    date_accessed: 2024-10-16
     date_published: '2024-07-07'
     license:
       name: CC BY 4.0

diff --git a/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc b/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc
@@ -13,7 +13,7 @@ meta:
       HMD provides an online STMF visualization toolkit (https://mpidr.shinyapps.io/stmortality).
     url: https://www.mortality.org/Data/STMF
     source_data_url: https://www.mortality.org/File/GetDocument/Public/STMF/Outputs/stmf.csv
-    date_accessed: 2024-10-15
+    date_accessed: 2024-10-16
     publication_date: 2024-09-30
     publication_year: 2024
     published_by: |-

diff --git a/snapshots/excess_mortality/latest/wmd.csv.dvc b/snapshots/excess_mortality/latest/wmd.csv.dvc
@@ -13,7 +13,7 @@ meta:
       Published paper available at https://elifesciences.org/articles/69336.
     url: https://github.com/akarlinsky/world_mortality/
     source_data_url: https://raw.githubusercontent.com/akarlinsky/world_mortality/main/world_mortality.csv
-    date_accessed: 2024-10-15
+    date_accessed: 2024-10-16
     publication_date: '2021-06-30'
     publication_year: 2021
     published_by: |-

diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc
@@ -7,7 +7,7 @@ meta:
       For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic.
     url: https://github.com/dkobak/excess-mortality
     source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-per-year.csv
-    date_accessed: 2024-10-15
+    date_accessed: 2024-10-16
     publication_date: '2021-06-30'
     publication_year: 2021
     published_by: |-

diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc
@@ -6,7 +6,7 @@ meta:
       For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic.
     url: https://github.com/dkobak/excess-mortality
     source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-stmf.csv
-    date_accessed: 2024-10-15
+    date_accessed: 2024-10-16
     publication_date: '2021-06-30'
     publication_year: 2021
     published_by: |-

diff --git a/snapshots/who/latest/fluid.csv.dvc b/snapshots/who/latest/fluid.csv.dvc
@@ -16,6 +16,6 @@ meta:
     The platform accommodates both qualitative and quantitative data which facilitates the tracking of global trends, spread, intensity, and impact of influenza. These data are made freely available to health policy makers in order to assist them in making informed decisions regarding the management of influenza.
 wdir: ../../../data/snapshots/who/latest
 outs:
-  - md5: abc6873780d866491328545f6dd3b4e7
-    size: 165789281
+  - md5: aa91ea6a8ed33a292919a124f46cfd67
+    size: 165814784
     path: fluid.csv
diff --git a/snapshots/who/latest/flunet.csv.dvc b/snapshots/who/latest/flunet.csv.dvc
@@ -16,6 +16,6 @@ meta:
     The data are provided remotely by National Influenza Centres (NICs) of the Global Influenza Surveillance and Response System (GISRS) and other national influenza reference laboratories collaborating actively with GISRS, or are uploaded from WHO regional databases.
 wdir: ../../../data/snapshots/who/latest
 outs:
-  - md5: 11feb754fc65574996285d4481a0e7e5
-    size: 26890129
+  - md5: 5f93578131a7ac494148a52f4fedfe4f
+    size: 26895005
     path: flunet.csv