Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 faostat: Update agriculture data #4091

Draft
wants to merge 17 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions dag/faostat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,135 @@ steps:
#
data://grapher/faostat/2024-03-14/additional_variables:
- data://garden/faostat/2024-03-14/additional_variables
#
# FAOSTAT meadow steps for version 2025-03-10
#
data://meadow/faostat/2025-03-10/faostat_fbs:
- snapshot://faostat/2025-03-10/faostat_fbs.zip
data://meadow/faostat/2025-03-10/faostat_fbsh:
- snapshot://faostat/2025-03-10/faostat_fbsh.zip
data://meadow/faostat/2025-03-10/faostat_fs:
- snapshot://faostat/2025-03-10/faostat_fs.zip
data://meadow/faostat/2025-03-10/faostat_ic:
- snapshot://faostat/2025-03-10/faostat_ic.zip
data://meadow/faostat/2025-03-10/faostat_lc:
- snapshot://faostat/2025-03-10/faostat_lc.zip
data://meadow/faostat/2025-03-10/faostat_metadata:
- snapshot://faostat/2025-03-10/faostat_metadata.json
data://meadow/faostat/2025-03-10/faostat_qcl:
- snapshot://faostat/2025-03-10/faostat_qcl.zip
data://meadow/faostat/2025-03-10/faostat_qv:
- snapshot://faostat/2025-03-10/faostat_qv.zip
data://meadow/faostat/2025-03-10/faostat_rfn:
- snapshot://faostat/2025-03-10/faostat_rfn.zip
data://meadow/faostat/2025-03-10/faostat_rl:
- snapshot://faostat/2025-03-10/faostat_rl.zip
data://meadow/faostat/2025-03-10/faostat_rp:
- snapshot://faostat/2025-03-10/faostat_rp.zip
data://meadow/faostat/2025-03-10/faostat_scl:
- snapshot://faostat/2025-03-10/faostat_scl.zip
data://meadow/faostat/2025-03-10/faostat_sdgb:
- snapshot://faostat/2025-03-10/faostat_sdgb.zip
#
# FAOSTAT garden steps for version 2025-03-10
#
data://garden/faostat/2025-03-10/faostat_fbsc:
- data://garden/demography/2024-07-15/population
- data://meadow/faostat/2025-03-10/faostat_fbs
- data://garden/wb/2024-07-29/income_groups
- data://meadow/faostat/2025-03-10/faostat_fbsh
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_food_explorer:
- data://garden/demography/2024-07-15/population
- data://garden/faostat/2025-03-10/faostat_qcl
- data://garden/faostat/2025-03-10/faostat_fbsc
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_fs:
- data://garden/demography/2024-07-15/population
- data://meadow/faostat/2025-03-10/faostat_fs
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_ic:
- data://meadow/faostat/2025-03-10/faostat_ic
- data://garden/demography/2024-07-15/population
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_lc:
- data://garden/demography/2024-07-15/population
- data://meadow/faostat/2025-03-10/faostat_lc
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_metadata:
- data://meadow/faostat/2024-03-14/faostat_cahd
- data://meadow/faostat/2025-03-10/faostat_rl
- data://meadow/faostat/2025-03-10/faostat_lc
- data://meadow/faostat/2024-03-14/faostat_tcl
- data://meadow/faostat/2025-03-10/faostat_fbsh
- data://meadow/faostat/2025-03-10/faostat_metadata
- data://meadow/faostat/2025-03-10/faostat_rp
- data://meadow/faostat/2024-03-14/faostat_ti
- data://meadow/faostat/2025-03-10/faostat_sdgb
- data://meadow/faostat/2025-03-10/faostat_qv
- data://meadow/faostat/2025-03-10/faostat_scl
- data://meadow/faostat/2025-03-10/faostat_ic
- data://meadow/faostat/2024-03-14/faostat_rfb
- data://meadow/faostat/2024-03-14/faostat_esb
- data://meadow/faostat/2025-03-10/faostat_fs
- data://meadow/faostat/2024-03-14/faostat_emn
- data://meadow/faostat/2024-03-14/faostat_ek
- data://meadow/faostat/2024-03-14/faostat_fa
- data://meadow/faostat/2024-03-14/faostat_ei
- data://meadow/faostat/2024-03-14/faostat_fo
- data://meadow/faostat/2025-03-10/faostat_rfn
- data://meadow/faostat/2024-03-14/faostat_qi
- data://meadow/faostat/2025-03-10/faostat_fbs
- data://meadow/faostat/2025-03-10/faostat_qcl
- data://meadow/faostat/2024-03-14/faostat_rt
data://garden/faostat/2025-03-10/faostat_qcl:
- data://garden/demography/2024-07-15/population
- data://garden/wb/2024-07-29/income_groups
- data://meadow/faostat/2025-03-10/faostat_qcl
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_qv:
- data://garden/demography/2024-07-15/population
- data://meadow/faostat/2025-03-10/faostat_qv
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_rfn:
- data://garden/demography/2024-07-15/population
- data://meadow/faostat/2025-03-10/faostat_rfn
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_rl:
- data://garden/demography/2024-07-15/population
- data://meadow/faostat/2025-03-10/faostat_rl
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_rp:
- data://meadow/faostat/2025-03-10/faostat_rp
- data://garden/demography/2024-07-15/population
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
data://garden/faostat/2025-03-10/faostat_scl:
- data://garden/demography/2024-07-15/population
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
- data://meadow/faostat/2025-03-10/faostat_scl
data://garden/faostat/2025-03-10/faostat_sdgb:
- data://garden/demography/2024-07-15/population
- data://meadow/faostat/2025-03-10/faostat_sdgb
- data://garden/wb/2024-07-29/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/faostat/2025-03-10/faostat_metadata
5 changes: 3 additions & 2 deletions etl/scripts/faostat/create_new_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def to_snapshot(self) -> None:
def load_faostat_catalog() -> List[Dict[str, Any]]:
# Some of the texts returned have special characters that seem to require CP-1252 decoding.
# datasets = requests.get(FAO_CATALOG_URL).json()["Datasets"]["Dataset"]
datasets = json.loads(requests.get(FAO_CATALOG_URL).content.decode("cp1252"))["Datasets"]["Dataset"]
datasets = json.loads(requests.get(FAO_CATALOG_URL).content.decode("utf-8"))["Datasets"]["Dataset"]
return datasets


Expand All @@ -185,6 +185,7 @@ def is_dataset_already_up_to_date(
"""
dataset_up_to_date = False
for snapshot in existing_snapshots:
# NOTE: This is still necessary (in the current implementation) to be able to handle old snapshots.
assert snapshot.metadata.source or snapshot.metadata.origin
if snapshot.metadata.source:
snapshot_source_data_url = snapshot.metadata.source.source_data_url
Expand All @@ -194,7 +195,7 @@ def is_dataset_already_up_to_date(
snapshot_date_accessed = parser.parse(str(snapshot.metadata.origin.date_accessed)).date()
else:
raise ValueError(f"Snapshot {snapshot.metadata.short_name} does not have source or origin.")
if (snapshot_source_data_url == source_data_url) and (snapshot_date_accessed > source_modification_date):
if (snapshot_source_data_url == source_data_url) and (snapshot_date_accessed >= source_modification_date):
dataset_up_to_date = True

return dataset_up_to_date
Expand Down
59 changes: 31 additions & 28 deletions etl/scripts/faostat/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,10 @@
# Codes of FAOSTAT domains to download from FAO and upload to walden bucket.
# This is the list that will determine the datasets (faostat_*) to be created in all further etl data steps.
INCLUDED_DATASETS_CODES = [
# Cost and Affordability of a Healthy Diet.
"cahd",
# Climate Change: Emissions intensities.
"ei",
# Land, Inputs and Sustainability: Livestock Patterns.
"ek",
# Land, Inputs and Sustainability: Livestock Manure.
"emn",
# Land, Inputs and Sustainability: Soil nutrient budget.
"esb",
# Discontinued archives and data series: Food Aid Shipments (WFP).
"fa",
# Food Balances: Food Balances (2010-).
"fbs",
# Food Balances: Food Balances (-2013, old methodology and population).
"fbsh",
# Forestry: Forestry Production and Trade.
"fo",
# Food Security and Nutrition: Suite of Food Security Indicators.
"fs",
# Credit to Agriculture.
Expand All @@ -63,33 +49,47 @@
"lc",
# Production: Crops and livestock products.
"qcl",
# Production: Production Indices.
"qi",
# Production: Value of Agricultural Production.
"qv",
# Land, Inputs and Sustainability: Fertilizers by Product.
"rfb",
# Land, Inputs and Sustainability: Fertilizers by Nutrient.
"rfn",
# Land, Inputs and Sustainability: Land Use.
"rl",
# Land, Inputs and Sustainability: Pesticides Use.
"rp",
# Land, Inputs and Sustainability: Pesticides Trade.
"rt",
# Food Balances: Supply Utilization Accounts.
"scl",
# SDG Indicators: SDG Indicators.
"sdgb",
# Removed from the list (as they have not been used and were causing issues).
# Cost and Affordability of a Healthy Diet.
# "cahd",
# Land, Inputs and Sustainability: Livestock Patterns.
# "ek",
# Climate Change: Emissions intensities.
# "ei",
# Land, Inputs and Sustainability: Livestock Manure.
# "emn",
# Land, Inputs and Sustainability: Soil nutrient budget.
# "esb",
# Discontinued archives and data series: Food Aid Shipments (WFP).
# "fa",
# Forestry: Forestry Production and Trade.
# "fo",
# Energy use.
# "gn",
# Production: Production Indices.
# "qi",
# Land, Inputs and Sustainability: Fertilizers by Product.
# "rfb",
# Land, Inputs and Sustainability: Pesticides Trade.
# "rt",
# Trade: Crops and livestock products.
"tcl",
# "tcl",
# Trade: Trade Indices.
"ti",
# Removed from the list (as they have not been used and were causing issues).
# "ti",
# World Census of Agriculture.
# "wcad",
# Energy use.
# "gn",
# The following domains used to exist in FAOSTAT, but they have been removed.
# Land, Inputs and Sustainability: Fertilizers indicators.
# "ef",
Expand All @@ -103,7 +103,9 @@
# latest update.
FAO_CATALOG_URL = "http://fenixservices.fao.org/faostat/static/bulkdownloads/datasets_E.json"
# Base URL of API, used to download metadata (about countries, elements, items, etc.).
API_BASE_URL = "https://fenixservices.fao.org/faostat/api/v1/en/definitions/domain"
# NOTE: It seems that the following link doesn't work for fenixservices, but it does work for faostatservices.
# Maybe they are transitioning towards the latter?
API_BASE_URL = "https://faostatservices.fao.org/api/v1/en/definitions/domain"
# Name of additional metadata step file (without extension).
ADDITIONAL_METADATA_FILE_NAME = f"{NAMESPACE}_metadata"
# Path to dag file for FAOSTAT steps.
Expand All @@ -119,8 +121,9 @@
"meadow": [],
"garden": [
(NAMESPACE, "garden", f"{NAMESPACE}_metadata"),
("owid", "garden", "key_indicators"),
("wb", "garden", "wb_income"),
("demography", "garden", "population"),
("regions", "garden", "regions"),
("wb", "garden", "income_groups"),
],
"grapher": [],
}
Expand Down
Loading