Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔨 migrate population explorer #4095

Draft
wants to merge 33 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
71c77be
🔨 migrate population explorer
lucasrodes Mar 10, 2025
ce33c4d
wip
lucasrodes Mar 10, 2025
fc266f6
Merge branch 'master' into refactor-migrate-population
lucasrodes Mar 10, 2025
a776e15
wip
lucasrodes Mar 10, 2025
359ad4c
enhance: explode only views of selected dimensions
lucasrodes Mar 10, 2025
7cfcce1
bug: order of dimensions and choices
lucasrodes Mar 10, 2025
194b728
simplify function
lucasrodes Mar 10, 2025
8444c1f
bring expand_config to `common`
lucasrodes Mar 10, 2025
4a77ec6
wip: population explorer
lucasrodes Mar 10, 2025
7359fb9
wip
lucasrodes Mar 11, 2025
eb64135
keep indicator as dimension argument
lucasrodes Mar 11, 2025
9562362
test
lucasrodes Mar 11, 2025
69c3997
🔨 create_mdim and create_explorer_v2 available in paths
lucasrodes Mar 11, 2025
b0afae6
wip
lucasrodes Mar 11, 2025
e3e19fc
Merge branch 'master' into refactor-mdim-explorer
lucasrodes Mar 11, 2025
2b8c54d
collection-specific parent abstraction class
lucasrodes Mar 11, 2025
97a9ac3
create_mdim and save()
lucasrodes Mar 11, 2025
f1e24f4
try create_mdim + save workflow for covid
lucasrodes Mar 11, 2025
8268bd1
upgrade mdim scripts
lucasrodes Mar 11, 2025
8482f0c
remove upsert_multidim_data_page
lucasrodes Mar 11, 2025
9238105
update example
lucasrodes Mar 11, 2025
5a0e0a0
add docstring
lucasrodes Mar 11, 2025
4523aba
remove dest_dir argument
lucasrodes Mar 11, 2025
f1853da
remove unused import
lucasrodes Mar 11, 2025
764af10
change name of function to signal it is legacy
lucasrodes Mar 11, 2025
1cdf9ce
minor fix
lucasrodes Mar 11, 2025
7956d5d
Merge branch 'refactor-mdim-explorer' into refactor-mdim-explorer-2
lucasrodes Mar 11, 2025
95a2242
remove duplicate code
lucasrodes Mar 11, 2025
a601644
Merge branch 'refactor-mdim-explorer' into refactor-mdim-explorer-2
lucasrodes Mar 11, 2025
71d255f
wip
lucasrodes Mar 11, 2025
d7cfbdd
Merge branch 'master' into refactor-mdim-explorer-2
lucasrodes Mar 11, 2025
20a695b
Merge branch 'master' into refactor-migrate-population
lucasrodes Mar 11, 2025
24c6366
Merge branch 'refactor-mdim-explorer-2' into refactor-migrate-population
lucasrodes Mar 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dag/archive/demography.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,6 @@ steps:
- data://meadow/demography/2023-04-07/population_education_wittgenstein
data://grapher/demography/2023-04-07/population_education_wittgenstein:
- data://garden/demography/2023-04-07/population_education_wittgenstein

data://explorers/un/2022/un_wpp:
- data://garden/un/2022-07-11/un_wpp
6 changes: 3 additions & 3 deletions dag/demography.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ steps:
- data://garden/un/2024-07-12/un_wpp
data://grapher/un/2024-07-12/un_wpp_full:
- data://garden/un/2024-07-12/un_wpp
export://explorers/un/latest/un_wpp:
- data://grapher/un/2024-07-12/un_wpp

# UN WPP (2022)
data://meadow/un/2022-07-11/un_wpp:
Expand All @@ -63,8 +65,6 @@ steps:
- data://meadow/un/2022-07-11/un_wpp
data://grapher/un/2022-07-11/un_wpp:
- data://garden/un/2022-07-11/un_wpp
data://explorers/un/2022/un_wpp:
- data://garden/un/2022-07-11/un_wpp

# WPP Historical comparison
data://garden/demography/2024-07-12/un_wpp_historical:
Expand Down Expand Up @@ -343,7 +343,7 @@ steps:
- data://garden/demography/2024-12-06/wittgenstein_human_capital_proj
data://grapher/demography/2024-12-06/wittgenstein_human_capital:
- data://garden/demography/2024-12-06/wittgenstein_human_capital

#
# TODO: add step name (just something recognizable)
#
Expand Down
405 changes: 404 additions & 1 deletion etl/collections/common.py

Large diffs are not rendered by default.

111 changes: 99 additions & 12 deletions etl/collections/explorers.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,101 @@
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, List, Optional, Set

import pandas as pd

from etl.collections.common import validate_collection_config
from etl.collections.model import CHART_DIMENSIONS, Explorer
from etl.collections.common import expand_config, validate_collection_config
from etl.collections.model import CHART_DIMENSIONS, Collection, Definitions, ExplorerView, pruned_json
from etl.collections.utils import (
get_tables_by_name_mapping,
validate_indicators_in_db,
)
from etl.config import OWID_ENV, OWIDEnv
from etl.explorer import Explorer as ExplorerOld
from etl.helpers import create_explorer as create_explorer_main
from etl.helpers import create_explorer_legacy


@pruned_json
@dataclass
class Explorer(Collection):
"""Model for Explorer configuration."""

views: List[ExplorerView]
config: Dict[str, str]
definitions: Optional[Definitions] = None

def display_config_names(self):
"""Get display names for all dimensions and choices.

The structure of the output is:

{
dimension_slug: {
"widget_name": "...",
"choices": {
choice_slug: choice_name,
...
}
},
...
}

where `widget_name` is actually not displayed anywhere, but used as header name in explorer config.
"""
mapping = {}
for dim in self.dimensions:
mapping[dim.slug] = {
"widget_name": f"{dim.name} {dim.ui_type.title()}",
"choices": {choice.slug: choice.name for choice in dim.choices},
}
return mapping

def save(self, owid_env: Optional[OWIDEnv] = None, tolerate_extra_indicators: bool = False):
# Ensure we have an environment set
if owid_env is None:
owid_env = OWID_ENV

# Check that all indicators in mdim exist
indicators = self.indicators_in_use(tolerate_extra_indicators)
validate_indicators_in_db(indicators, owid_env.engine)

# TODO: Below code should be replaced at some point with DB-interaction code, as in `etl.collections.multidim.upsert_mdim_data_page`.
# Extract Explorer view rows. NOTE: This is for compatibility with current Explorer config structure.
df_grapher = extract_explorer_views(self)

# Create explorer
ds = create_explorer_legacy(
dest_dir=dest_dir, # TODO: Fix
config=self.config,
df_graphers=df_grapher,
)

ds.save()


__all__ = ["expand_config"]


def create_explorer(
dest_dir: str,
config: dict,
dependencies: Set[str],
owid_env: Optional[OWIDEnv] = None,
tolerate_extra_indicators: bool = False,
) -> ExplorerOld:
"""TODO: Replicate `etl.collections.multidim.upsert_mdim_data_page`."""
) -> Explorer:
"""Create an explorer object."""
# Read configuration as structured data
explorer = Explorer.from_dict(config)

# Edit views
process_views(explorer, dependencies)

# Create explorer (TODO: this should rather push to DB! As in with `etl.collections.multidim.upsert_mdim_data_page`)
return _create_explorer(dest_dir, explorer, tolerate_extra_indicators, owid_env)
# Validate config
# explorer.validate_schema(SCHEMAS_DIR / "explorer-schema.json")

# Ensure that all views are in choices
explorer.validate_views_with_dimensions()

# Validate duplicate views
explorer.check_duplicate_views()

return explorer


def process_views(
Expand All @@ -55,6 +122,7 @@ def _create_explorer(
dest_dir: str,
explorer: Explorer,
tolerate_extra_indicators: bool,
explorer_name: Optional[str] = None,
owid_env: Optional[OWIDEnv] = None,
):
# Ensure we have an environment set
Expand All @@ -70,10 +138,11 @@ def _create_explorer(
df_grapher = extract_explorer_views(explorer)

# Create explorer
ds = create_explorer_main(
ds = create_explorer_legacy(
dest_dir=dest_dir,
config=explorer.config,
df_graphers=df_grapher,
explorer_name=explorer_name,
)

return ds
Expand Down Expand Up @@ -125,6 +194,24 @@ def extract_explorer_views(

# Build DataFrame with records
df_grapher = pd.DataFrame.from_records(records)

# Order views
## Order rows
for _, properties in dimensions_display.items():
column = properties["widget_name"]
choices_ordered = list(properties["choices"].values())
# Check if all DataFrame values exist in the predefined lists
if not set(df_grapher[column]).issubset(set(choices_ordered)):
raise ValueError(f"Column `{column}` contains values not present in `choices_ordered`.")

# Convert columns to categorical with the specified order
df_grapher[column] = pd.Categorical(df_grapher[column], categories=choices_ordered, ordered=True)
df_grapher = df_grapher.sort_values(by=[d["widget_name"] for _, d in dimensions_display.items()])

## Order columns
cols_widgets = [d["widget_name"] for _, d in dimensions_display.items()]
df_grapher = df_grapher[cols_widgets + [col for col in df_grapher.columns if col not in cols_widgets]]

return df_grapher


Expand Down
39 changes: 1 addition & 38 deletions etl/collections/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,10 +378,9 @@ def __post_init__(self):

@property
def ui_type(self):
default = UITypes.DROPDOWN
if self.presentation is not None:
return self.presentation.type
return default
return UITypes.DROPDOWN

@property
def choice_slugs(self):
Expand Down Expand Up @@ -469,42 +468,6 @@ def check_duplicate_views(self):
# raise ValueError(f"Duplicate indicators: {vc[vc > 1].index.tolist()}")


@pruned_json
@dataclass
class Explorer(Collection):
"""Model for Explorer configuration."""

views: List[ExplorerView]
config: Dict[str, str]
definitions: Optional[Definitions] = None

def display_config_names(self):
"""Get display names for all dimensions and choices.

The structure of the output is:

{
dimension_slug: {
"widget_name": "...",
"choices": {
choice_slug: choice_name,
...
}
},
...
}

where `widget_name` is actually not displayed anywhere, but used as header name in explorer config.
"""
mapping = {}
for dim in self.dimensions:
mapping[dim.slug] = {
"widget_name": f"{dim.name} {dim.ui_type.title()}",
"choices": {choice.slug: choice.name for choice in dim.choices},
}
return mapping


# def main():
# import yaml

Expand Down
Loading