From a031c7e089fb7cc80e9e1fc5ea6a3a9f74dd86a5 Mon Sep 17 00:00:00 2001
From: Adam
Date: Thu, 3 Aug 2023 19:21:09 +0200
Subject: [PATCH 1/3] Bump altair and loguru versions
---
eds_scikit/biology/viz/plot.py | 7 +++----
eds_scikit/biology/viz/wrapper.py | 6 +++---
eds_scikit/plot/age_pyramid.py | 4 ++--
eds_scikit/plot/event_sequences.py | 4 ++--
pyproject.toml | 4 ++--
tests/test_age_pyramid.py | 4 ++--
tests/test_event_sequences.py | 2 +-
7 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/eds_scikit/biology/viz/plot.py b/eds_scikit/biology/viz/plot.py
index 9c001d25..971a2e0f 100644
--- a/eds_scikit/biology/viz/plot.py
+++ b/eds_scikit/biology/viz/plot.py
@@ -4,7 +4,6 @@
import altair as alt
import pandas as pd
-from altair.vegalite.v4.api import VConcatChart as AltChart
from IPython.display import display
from loguru import logger
from pretty_html_table import build_table
@@ -15,7 +14,7 @@
def plot_concepts_set(
concepts_set_name: str,
source_path: str = "Biology_summary",
-) -> Union[AltChart, pd.DataFrame]:
+) -> Union[alt.ConcatChart, pd.DataFrame]:
"""Plot and save a summary table and 2 interactive dashboards. For more details, have a look on the [visualization section][visualization]
Parameters
@@ -27,7 +26,7 @@ def plot_concepts_set(
Returns
-------
- List[AltChart, pd.DataFrame]
+ List[alt.ConcatChart, pd.DataFrame]
Altair plots describing the volumetric and the distribution properties of your biological data along with a pandas DataFrame with a statistical summary
"""
if os.path.isdir("{}/{}".format(source_path, concepts_set_name)):
@@ -80,7 +79,7 @@ def plot_concepts_set(
def _save_and_display_chart(
- chart: AltChart, source_path: str, concepts_set_name: str, chart_name: str
+ chart: alt.ConcatChart, source_path: str, concepts_set_name: str, chart_name: str
):
chart.display()
chart.save("{}/{}/{}.html".format(source_path, concepts_set_name, chart_name))
diff --git a/eds_scikit/biology/viz/wrapper.py b/eds_scikit/biology/viz/wrapper.py
index f243134c..3e7ea8f6 100644
--- a/eds_scikit/biology/viz/wrapper.py
+++ b/eds_scikit/biology/viz/wrapper.py
@@ -3,8 +3,8 @@
from shutil import rmtree
from typing import List, Tuple, Union
+import altair as alt
import pandas as pd
-from altair.vegalite.v4.api import VConcatChart as AltChart
from loguru import logger
from eds_scikit.biology.utils.process_concepts import (
@@ -32,7 +32,7 @@ def plot_biology_summary(
standard_concept_regex: dict = default_standard_concept_regex,
pd_limit_size: int = 100000,
stats_only: bool = False,
-) -> Union[AltChart, pd.DataFrame]:
+) -> Union[alt.ConcatChart, pd.DataFrame]:
"""It aggregates, plots and saves all the concepts-sets in folders.
@@ -65,7 +65,7 @@ def plot_biology_summary(
Returns
-------
- List[AltChart, pd.DataFrame]
+ List[alt.ConcatChart, pd.DataFrame]
Altair plots describing the volumetric and the distribution properties of your biological data along with a pandas DataFrame with a statistical summary
"""
diff --git a/eds_scikit/plot/age_pyramid.py b/eds_scikit/plot/age_pyramid.py
index 471171f6..cbbbd5ce 100644
--- a/eds_scikit/plot/age_pyramid.py
+++ b/eds_scikit/plot/age_pyramid.py
@@ -17,7 +17,7 @@ def plot_age_pyramid(
person: DataFrame,
datetime_ref: datetime = None,
return_array: bool = False,
-) -> Tuple[alt.Chart, Series]:
+) -> Tuple[alt.ConcatChart, Series]:
"""Plot an age pyramid from a 'person' pandas DataFrame.
Parameters
@@ -46,7 +46,7 @@ def plot_age_pyramid(
Returns
-------
- chart : alt.Chart,
+ chart : alt.ConcatChart,
If savefig set to True, returns None.
group_gender_age : Series,
diff --git a/eds_scikit/plot/event_sequences.py b/eds_scikit/plot/event_sequences.py
index 9c2b4730..113e421b 100644
--- a/eds_scikit/plot/event_sequences.py
+++ b/eds_scikit/plot/event_sequences.py
@@ -24,7 +24,7 @@ def plot_event_sequences(
bar_height: Optional[int] = 20,
title: Optional[str] = None,
seed: Optional[int] = 0,
-) -> alt.Chart:
+) -> alt.VConcatChart:
"""
Plots individual sequences from an events DataFrame. Each event must be recorded with a start date, a name and a `person_id`.
Events can be both one-time (only start date given) or longitudinal (both start and end dates).
@@ -74,7 +74,7 @@ def plot_event_sequences(
Returns
-------
- chart: alt.Chart
+ chart: alt.VConcatChart
Chart with the plotted individual event sequences.
"""
rng = np.random.RandomState(seed)
diff --git a/pyproject.toml b/pyproject.toml
index de861ddb..0bc9fc13 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,8 +37,8 @@ dependencies = [
"pandas>=1.3.0, <2.0.0",
"numpy>=1.0.0, <1.20",
"koalas>=1.8.1, <2.0.0",
- "altair>=4.2.0, <5.0.0",
- "loguru>=0.6.0, <0.7.0",
+ "altair>=5.0.0, <6.0.0",
+ "loguru==0.7.0",
"pypandoc==1.7.5",
"pyspark==2.4.3",
"pyarrow==0.17.0", #"pyarrow>=0.10, <0.17.0",
diff --git a/tests/test_age_pyramid.py b/tests/test_age_pyramid.py
index 3bc1e458..bfb77d4b 100644
--- a/tests/test_age_pyramid.py
+++ b/tests/test_age_pyramid.py
@@ -27,7 +27,7 @@
def test_plot_age_pyramid(datetime_ref):
original_person = person_with_inclusion_date.copy()
chart = plot_age_pyramid(person_with_inclusion_date, datetime_ref)
- assert isinstance(chart, alt.vegalite.v4.api.ConcatChart)
+ assert isinstance(chart, alt.ConcatChart)
# Check that the data is unchanged
assert_frame_equal(original_person, person_with_inclusion_date)
@@ -36,7 +36,7 @@ def test_plot_age_pyramid(datetime_ref):
def test_age_pyramid_output():
chart = plot_age_pyramid(data.person)
- assert isinstance(chart, alt.vegalite.v4.api.ConcatChart)
+ assert isinstance(chart, alt.ConcatChart)
group_gender_age = plot_age_pyramid(data.person, return_array=True)
assert isinstance(group_gender_age, Series)
diff --git a/tests/test_event_sequences.py b/tests/test_event_sequences.py
index fe11047a..70ed977e 100644
--- a/tests/test_event_sequences.py
+++ b/tests/test_event_sequences.py
@@ -49,4 +49,4 @@ def test_event_sequences(
same_x_axis_scale=same_x_axis_scale,
title=title,
)
- assert type(chart) == alt.vegalite.v4.api.VConcatChart
+ assert type(chart) == alt.VConcatChart
From ee832e09dc7f150e9a06cb46b37c23263383328f Mon Sep 17 00:00:00 2001
From: Adam
Date: Fri, 4 Aug 2023 17:58:55 +0200
Subject: [PATCH 2/3] Cap mkdocs version
---
mkdocs.yml | 2 +-
pyproject.toml | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/mkdocs.yml b/mkdocs.yml
index 1b14cc30..db7dfea3 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -96,7 +96,7 @@ plugins:
module_name: docs/macros
- bibtex:
#bib_file: "docs/references.bib"
- bib_dir: "./"
+ bib_dir: "./eds_scikit"
- gen-files:
scripts:
- docs/generate_reference.py
diff --git a/pyproject.toml b/pyproject.toml
index 0bc9fc13..0a5310c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,6 +83,7 @@ doc = [
"jinja2==3.0.3",
"mike==1.1.2",
"nbformat==5.7.0",
+ "mkdocs<1.5.0",
"mkdocs-autorefs==0.3.1",
"mkdocs-bibtex==2.8.16",
"mkdocs-charts-plugin==0.0.8",
From 469449055359ccb33ea3f0954de0f7826b8a6430 Mon Sep 17 00:00:00 2001
From: Adam
Date: Mon, 7 Aug 2023 18:29:34 +0200
Subject: [PATCH 3/3] Fix breaking changes altair v5
---
.../Custom_entity/stats_summary.csv | 3 -
.../Custom_entity/stats_summary.html | 63 --
.../Protein_Quantitative/stats_summary.csv | 14 -
.../Protein_Quantitative/stats_summary.html | 272 ------
docs/functionalities/biology/tutorial.ipynb | 21 +-
eds_scikit/biology/viz/plot.py | 825 ++++--------------
eds_scikit/plot/age_pyramid.py | 4 +-
7 files changed, 167 insertions(+), 1035 deletions(-)
delete mode 100644 docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.csv
delete mode 100644 docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.html
delete mode 100644 docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.csv
delete mode 100644 docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.html
diff --git a/docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.csv b/docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.csv
deleted file mode 100644
index 7e357a17..00000000
--- a/docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-LOINC_concept_code,AnaBio_concept_code,LOINC_concept_name,AnaBio_concept_name,unit_source_value,count,mean,std,min,25%,50%,75%,max,MAD,max_threshold,min_threshold
-1751-7,C2102,Albumine [Masse/Volume] Sérum/Plasma - Numérique,Albumine_Sérum_Colorimétrie_g/L,g/l,650,30.971,9.399,10.449,28.504,26.629,36.839,43.274,6.75,81.949,0.0
-1751-7,G6616,Albumine [Masse/Volume] Sérum/Plasma - Numérique,Albumine_Sérum_Turbidimétrie_g/L,g/l,1356,25.287,7.81,12.934,27.492,28.8,32.822,57.656,7.074,84.334,0.0
diff --git a/docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.html b/docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.html
deleted file mode 100644
index 74fc751b..00000000
--- a/docs/functionalities/biology/Biology_summary/Custom_entity/stats_summary.html
+++ /dev/null
@@ -1,63 +0,0 @@
-
-
-
- |
- LOINC_concept_code |
- AnaBio_concept_code |
- LOINC_concept_name |
- AnaBio_concept_name |
- unit_source_value |
- count |
- mean |
- std |
- min |
- 25% |
- 50% |
- 75% |
- max |
- MAD |
- max_threshold |
- min_threshold |
-
-
-
-
- 0 |
- 1751-7 |
- C2102 |
- Albumine [Masse/Volume] Sérum/Plasma - Numérique |
- Albumine_Sérum_Colorimétrie_g/L |
- g/l |
- 650 |
- 30.971 |
- 9.399 |
- 10.449 |
- 28.504 |
- 26.629 |
- 36.839 |
- 43.274 |
- 6.75 |
- 81.949 |
- 0.0 |
-
-
- 1 |
- 1751-7 |
- G6616 |
- Albumine [Masse/Volume] Sérum/Plasma - Numérique |
- Albumine_Sérum_Turbidimétrie_g/L |
- g/l |
- 1356 |
- 25.287 |
- 7.81 |
- 12.934 |
- 27.492 |
- 28.8 |
- 32.822 |
- 57.656 |
- 7.074 |
- 84.334 |
- 0.0 |
-
-
-
diff --git a/docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.csv b/docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.csv
deleted file mode 100644
index f777ea8d..00000000
--- a/docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.csv
+++ /dev/null
@@ -1,14 +0,0 @@
-LOINC_concept_code,AnaBio_concept_code,LOINC_concept_name,AnaBio_concept_name,unit_source_value,count,mean,std,min,25%,50%,75%,max,MAD,max_threshold,min_threshold
-2885-2,A0249,Prot SerPl-mCnc,Protéines_Sérum_g/L,g/l,6021,77.286,8.321,24.819,65.504,61.279,85.818,104.826,8.924,103.919,23.073
-2885-2,A0250,Prot SerPl-mCnc,Protéines_Sérum_Electrophorèse_g/L,g/l,1176,59.705,7.609,24.735,47.535,84.605,90.445,137.543,7.131,91.838,32.455
-2885-2,A7347,Prot SerPl-mCnc,Protéines_Plasma_g/L,g/l,12421,51.113,8.548,22.551,63.876,58.16,77.023,95.262,8.17,86.654,33.378
-2885-2,B9417,Prot SerPl-mCnc,Protéines_Sérum_Colorimétrie_g/L,g/l,601,56.906,12.196,32.205,55.82,56.61,69.69,79.671,7.919,121.822,31.16
-2885-2,C9874,Prot SerPl-mCnc,Protéines_Sérum_Electrophorèse 2_g/L,g/l,169,54.237,6.402,54.82,51.428,76.413,74.323,84.257,8.145,124.186,34.603
-2885-2,D0058,Prot SerPl-mCnc,Protéines Après dialyse_Sérum/Plasma_g/L,g/l,51,64.92,4.699,52.023,71.595,61.444,78.434,76.351,4.502,73.379,39.551
-2885-2,F2624,Prot SerPl-mCnc,Protéines Pédiatrique_Sérum/Plasma_g/L,g/l,3,58.934,11.768,45.364,40.882,54.139,59.366,84.88,11.952,77.996,5.854
-2885-2,F5122,Prot SerPl-mCnc,Protéines Duplication A7347_Plasma_g/L,g/l,213,80.395,6.134,40.129,69.549,66.73,85.024,110.905,8.824,113.764,38.456
-2888-6,A1694,Protéines [Masse/Volume] Urine - Numérique,Protéines_Urines 24h_g/L,g/l,193,2.343,4.262,0.063,0.089,0.257,1.62,52.679,0.162,1.275,0.0
-2888-6,A1695,Protéines [Masse/Volume] Urine - Numérique,Protéines_Urines_g/L,g/l,2300,0.648,1.621,0.0,0.076,0.181,0.428,35.934,0.144,0.76,0.0
-2888-6,C9990,Non Renseigné,Protéines Duplication A1695_Urines_g/L,g/l,13,0.227,0.478,0.058,0.057,0.056,0.109,1.687,0.043,0.211,0.0
-2888-6,D0064,Non Renseigné,Protéines Sonde vésicale_Urines_g/L,g/l,2,0.389,0.368,0.138,0.29,0.378,0.501,0.643,0.272,1.576,0.0
-2888-6,J7268,Protéines [Masse/Volume] Urine - Numérique,Protéines Triplication A1695_Urines_g/L,g/l,115,0.808,0.961,0.038,0.054,0.198,0.505,6.025,0.286,1.482,0.0
diff --git a/docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.html b/docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.html
deleted file mode 100644
index 33c40e99..00000000
--- a/docs/functionalities/biology/Biology_summary/Protein_Quantitative/stats_summary.html
+++ /dev/null
@@ -1,272 +0,0 @@
-
-
-
- |
- LOINC_concept_code |
- AnaBio_concept_code |
- LOINC_concept_name |
- AnaBio_concept_name |
- unit_source_value |
- count |
- mean |
- std |
- min |
- 25% |
- 50% |
- 75% |
- max |
- MAD |
- max_threshold |
- min_threshold |
-
-
-
-
- 0 |
- 2885-2 |
- A0249 |
- Prot SerPl-mCnc |
- Protéines_Sérum_g/L |
- g/l |
- 6021 |
- 77.286 |
- 8.321 |
- 24.819 |
- 65.504 |
- 61.279 |
- 85.818 |
- 104.826 |
- 8.924 |
- 103.919 |
- 23.073 |
-
-
- 1 |
- 2885-2 |
- A0250 |
- Prot SerPl-mCnc |
- Protéines_Sérum_Electrophorèse_g/L |
- g/l |
- 1176 |
- 59.705 |
- 7.609 |
- 24.735 |
- 47.535 |
- 84.605 |
- 90.445 |
- 137.543 |
- 7.131 |
- 91.838 |
- 32.455 |
-
-
- 2 |
- 2885-2 |
- A7347 |
- Prot SerPl-mCnc |
- Protéines_Plasma_g/L |
- g/l |
- 12421 |
- 51.113 |
- 8.548 |
- 22.551 |
- 63.876 |
- 58.16 |
- 77.023 |
- 95.262 |
- 8.17 |
- 86.654 |
- 33.378 |
-
-
- 3 |
- 2885-2 |
- B9417 |
- Prot SerPl-mCnc |
- Protéines_Sérum_Colorimétrie_g/L |
- g/l |
- 601 |
- 56.906 |
- 12.196 |
- 32.205 |
- 55.82 |
- 56.61 |
- 69.69 |
- 79.671 |
- 7.919 |
- 121.822 |
- 31.16 |
-
-
- 4 |
- 2885-2 |
- C9874 |
- Prot SerPl-mCnc |
- Protéines_Sérum_Electrophorèse 2_g/L |
- g/l |
- 169 |
- 54.237 |
- 6.402 |
- 54.82 |
- 51.428 |
- 76.413 |
- 74.323 |
- 84.257 |
- 8.145 |
- 124.186 |
- 34.603 |
-
-
- 5 |
- 2885-2 |
- D0058 |
- Prot SerPl-mCnc |
- Protéines Après dialyse_Sérum/Plasma_g/L |
- g/l |
- 51 |
- 64.92 |
- 4.699 |
- 52.023 |
- 71.595 |
- 61.444 |
- 78.434 |
- 76.351 |
- 4.502 |
- 73.379 |
- 39.551 |
-
-
- 6 |
- 2885-2 |
- F2624 |
- Prot SerPl-mCnc |
- Protéines Pédiatrique_Sérum/Plasma_g/L |
- g/l |
- 3 |
- 58.934 |
- 11.768 |
- 45.364 |
- 40.882 |
- 54.139 |
- 59.366 |
- 84.88 |
- 11.952 |
- 77.996 |
- 5.854 |
-
-
- 7 |
- 2885-2 |
- F5122 |
- Prot SerPl-mCnc |
- Protéines Duplication A7347_Plasma_g/L |
- g/l |
- 213 |
- 80.395 |
- 6.134 |
- 40.129 |
- 69.549 |
- 66.73 |
- 85.024 |
- 110.905 |
- 8.824 |
- 113.764 |
- 38.456 |
-
-
- 8 |
- 2888-6 |
- A1694 |
- Protéines [Masse/Volume] Urine - Numérique |
- Protéines_Urines 24h_g/L |
- g/l |
- 193 |
- 2.343 |
- 4.262 |
- 0.063 |
- 0.089 |
- 0.257 |
- 1.62 |
- 52.679 |
- 0.162 |
- 1.275 |
- 0.0 |
-
-
- 9 |
- 2888-6 |
- A1695 |
- Protéines [Masse/Volume] Urine - Numérique |
- Protéines_Urines_g/L |
- g/l |
- 2300 |
- 0.648 |
- 1.621 |
- 0.0 |
- 0.076 |
- 0.181 |
- 0.428 |
- 35.934 |
- 0.144 |
- 0.76 |
- 0.0 |
-
-
- 10 |
- 2888-6 |
- C9990 |
- Non Renseigné |
- Protéines Duplication A1695_Urines_g/L |
- g/l |
- 13 |
- 0.227 |
- 0.478 |
- 0.058 |
- 0.057 |
- 0.056 |
- 0.109 |
- 1.687 |
- 0.043 |
- 0.211 |
- 0.0 |
-
-
- 11 |
- 2888-6 |
- D0064 |
- Non Renseigné |
- Protéines Sonde vésicale_Urines_g/L |
- g/l |
- 2 |
- 0.389 |
- 0.368 |
- 0.138 |
- 0.29 |
- 0.378 |
- 0.501 |
- 0.643 |
- 0.272 |
- 1.576 |
- 0.0 |
-
-
- 12 |
- 2888-6 |
- J7268 |
- Protéines [Masse/Volume] Urine - Numérique |
- Protéines Triplication A1695_Urines_g/L |
- g/l |
- 115 |
- 0.808 |
- 0.961 |
- 0.038 |
- 0.054 |
- 0.198 |
- 0.505 |
- 6.025 |
- 0.286 |
- 1.482 |
- 0.0 |
-
-
-
diff --git a/docs/functionalities/biology/tutorial.ipynb b/docs/functionalities/biology/tutorial.ipynb
index bd6a87d5..5e6c0b35 100644
--- a/docs/functionalities/biology/tutorial.ipynb
+++ b/docs/functionalities/biology/tutorial.ipynb
@@ -23,7 +23,7 @@
"metadata": {},
"outputs": [],
"source": [
- "%load_ext autoreload\n",
+ "%reload_ext autoreload\n",
"%autoreload 2"
]
},
@@ -618,14 +618,6 @@
"pd.read_csv(\"./Biology_summary/Protein_Quantitative/stats_summary.csv\")"
]
},
- {
- "cell_type": "markdown",
- "id": "e6151283",
- "metadata": {},
- "source": [
- "If you prefer, a [HTML table](./Biology_summary/Protein_Quantitative/stats_summary.html) is also generated along with the CSV (same name, but with a `.html` extension"
- ]
- },
{
"cell_type": "markdown",
"id": "70ce6f91",
@@ -777,9 +769,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "scikit",
+ "display_name": "Python 3",
"language": "python",
- "name": "scikit"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -791,12 +783,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.8"
- },
- "vscode": {
- "interpreter": {
- "hash": "a251e067adbd8c45f6d9e77d22a52920634565c087038863128b0fd851122a62"
- }
+ "version": "3.8.10"
}
},
"nbformat": 4,
diff --git a/eds_scikit/biology/viz/plot.py b/eds_scikit/biology/viz/plot.py
index 971a2e0f..5b42ea24 100644
--- a/eds_scikit/biology/viz/plot.py
+++ b/eds_scikit/biology/viz/plot.py
@@ -86,7 +86,6 @@ def _save_and_display_chart(
def _save_and_display_table(table: DataFrame, source_path: str, concepts_set_name: str):
-
display(table)
html_measurement_stats = build_table(
table,
@@ -108,7 +107,6 @@ def _save_and_display_table(table: DataFrame, source_path: str, concepts_set_nam
def plot_interactive_distribution(measurement: DataFrame):
-
standard_terminologies = _get_standard_terminologies(measurement)
measurement["over_freq"] = measurement["frequency"].where(
@@ -123,7 +121,7 @@ def plot_interactive_distribution(measurement: DataFrame):
measurement["legend_outlier"] = "Outliers grouped at the thresholds"
alt.data_transformers.disable_max_rows()
- hospital_selection = alt.selection_multi(fields=["care_site_short_name"])
+ hospital_selection = alt.selection_point(fields=["care_site_short_name"])
value_selection = alt.selection_interval(bind="scales", encodings=["x"])
color_hospital = alt.condition(
hospital_selection,
@@ -135,7 +133,7 @@ def plot_interactive_distribution(measurement: DataFrame):
hospital_hist = (
alt.Chart(measurement)
- .mark_bar()
+ .mark_bar(cornerRadiusEnd=10)
.encode(
y=alt.Y(
"sum(frequency):Q",
@@ -151,7 +149,7 @@ def plot_interactive_distribution(measurement: DataFrame):
tooltip=alt.Tooltip("sum(frequency):Q", format=","),
)
.transform_filter(value_selection)
- .add_selection(hospital_selection)
+ .add_params(hospital_selection)
).properties(width=900)
# Density Chart
@@ -174,7 +172,7 @@ def plot_interactive_distribution(measurement: DataFrame):
}
],
)
- .mark_bar()
+ .mark_bar(cornerRadiusEnd=10)
.encode(
x=alt.X(
"binned_value:Q",
@@ -186,12 +184,12 @@ def plot_interactive_distribution(measurement: DataFrame):
title="Overall frequency",
),
color=alt.Color(
- "quartile:O",
+ "min(quartile):O",
scale=alt.Scale(scheme="pastel1"),
legend=None,
),
tooltip=[
- alt.Tooltip("binned_value:Q", title="Value", format=","),
+ alt.Tooltip("binned_value:Q", title="Value", format=".2f"),
alt.Tooltip("sum(inside_freq):Q", title="Frequency", format=","),
],
)
@@ -205,10 +203,11 @@ def plot_interactive_distribution(measurement: DataFrame):
TotalUnder="sum(under_freq)",
MinValue="min(binned_value)",
)
- .mark_bar(color="gray")
+ .mark_bar(color="gray", cornerRadiusEnd=10)
.encode(
opacity=alt.Opacity(
- "legend_outlier",
+ "min(legend_outlier)",
+ scale=alt.Scale(rangeMin=1),
legend=alt.Legend(orient="bottom", title=None),
)
)
@@ -277,7 +276,6 @@ def plot_interactive_distribution(measurement: DataFrame):
alt.layer(overall_underlier, overall_overlier).resolve_scale(y="shared"),
)
.resolve_scale(y="independent")
- .add_selection(value_selection)
.transform_filter(hospital_selection)
)
@@ -287,149 +285,165 @@ def plot_interactive_distribution(measurement: DataFrame):
terminologies_selection = []
overall_densities = []
width = 900 / len(standard_terminologies)
-
terminology_dist_base = (alt.Chart(measurement)).properties(
height=100, width=width
)
- terminology_density = (
- terminology_dist_base.transform_window(
- sort=[{"field": "binned_value"}],
- groupby=[
- "{}_concept_code".format(terminology)
- for terminology in standard_terminologies
- ],
- cumulative_count="sum(inside_freq)",
+ for terminology in standard_terminologies:
+ terminology_density = (
+ terminology_dist_base.transform_window(
+ sort=[{"field": "binned_value"}],
+ groupby=["{}_concept_code".format(terminology)],
+ cumulative_count="sum(inside_freq)",
+ )
+ .transform_window(
+ sort=[{"field": "cumulative_count"}],
+ groupby=["{}_concept_code".format(terminology)],
+ window=[
+ {
+ "field": "cumulative_count",
+ "op": "ntile",
+ "as": "quartile",
+ "param": 4,
+ }
+ ],
+ )
+ .mark_bar(cornerRadiusEnd=10)
+ .encode(
+ x=alt.X(
+ "binned_value:Q",
+ title="Value",
+ ),
+ y=alt.Y(
+ "sum(inside_freq):Q",
+ axis=alt.Axis(format="s"),
+ title="Frequency",
+ ),
+ color=alt.Color(
+ "min(quartile):O",
+ scale=alt.Scale(domain=[1, 2, 3, 4], scheme="pastel1"),
+ legend=alt.Legend(orient="bottom", title="Quartile"),
+ ),
+ tooltip=[
+ alt.Tooltip("binned_value:Q", title="Value", format=".2f"),
+ alt.Tooltip(
+ "sum(inside_freq):Q", title="Frequency", format=","
+ ),
+ ],
+ )
)
- .transform_window(
- sort=[{"field": "cumulative_count"}],
- groupby=[
- "{}_concept_code".format(terminology)
- for terminology in standard_terminologies
- ],
- window=[
- {
- "field": "cumulative_count",
- "op": "ntile",
- "as": "quartile",
- "param": 4,
- }
- ],
+
+ terminology_outlier_base = (
+ terminology_dist_base.transform_joinaggregate(
+ TotalMeasures="sum(frequency)",
+ TotalOver="sum(over_freq)",
+ MaxValue="max(binned_value)",
+ TotalUnder="sum(under_freq)",
+ MinValue="min(binned_value)",
+ groupby=["{}_concept_code:N".format(terminology)],
+ )
+ .mark_bar(color="grey", cornerRadiusEnd=10)
+ .encode(
+ opacity=alt.Opacity(
+ "legend_outlier",
+ scale=alt.Scale(rangeMin=1),
+ legend=alt.Legend(orient="bottom", title=None),
+ )
+ )
)
- .mark_bar()
- .encode(
+
+ terminology_overlier = terminology_outlier_base.transform_calculate(
+ Percentage="datum.TotalOver / datum.TotalMeasures"
+ ).encode(
x=alt.X(
- "binned_value:Q",
+ "MaxValue:Q",
title="Value",
),
y=alt.Y(
- "sum(inside_freq):Q",
+ "max(TotalOver):Q",
axis=alt.Axis(format="s"),
- title="Frequency",
- ),
- color=alt.Color(
- "quartile:O",
- scale=alt.Scale(domain=[1, 2, 3, 4], scheme="pastel1"),
- legend=alt.Legend(orient="bottom", title="Quartile"),
+ title="Outliers frequency",
),
tooltip=[
- alt.Tooltip("binned_value:Q", title="Value", format=","),
- alt.Tooltip("sum(inside_freq):Q", title="Frequency", format=","),
- ],
- )
- )
-
- terminology_outlier_base = (
- terminology_dist_base.transform_joinaggregate(
- TotalMeasures="sum(frequency)",
- TotalOver="sum(over_freq)",
- MaxValue="max(binned_value)",
- TotalUnder="sum(under_freq)",
- MinValue="min(binned_value)",
- groupby=[
- "{}_concept_code:N".format(terminology)
- for terminology in standard_terminologies
+ alt.Tooltip(
+ "MaxValue:Q",
+ title="Maximum threshold (computed with MAD formula)",
+ format=",",
+ ),
+ alt.Tooltip(
+ "max(TotalOver):Q",
+ title="Frequency over the maximum",
+ ),
+ alt.Tooltip(
+ "max(Percentage):Q",
+ format=".2%",
+ ),
],
)
- .mark_bar(color="grey")
- .encode(
- opacity=alt.Opacity(
- "legend_outlier",
- legend=alt.Legend(orient="bottom", title=None),
- )
- )
- )
-
- terminology_overlier = terminology_outlier_base.transform_calculate(
- Percentage="datum.TotalOver / datum.TotalMeasures"
- ).encode(
- x=alt.X(
- "MaxValue:Q",
- title="Value",
- ),
- y=alt.Y(
- "TotalOver:Q",
- axis=alt.Axis(format="s"),
- title="Outliers frequency",
- ),
- tooltip=[
- alt.Tooltip(
- "MaxValue:Q",
- title="Maximum threshold (computed with MAD formula)",
- format=",",
- ),
- alt.Tooltip(
- "TotalOver:Q",
- title="Frequency over the maximum",
- ),
- alt.Tooltip(
- "Percentage:Q",
- format=".2%",
- ),
- ],
- )
- terminology_underlier = terminology_outlier_base.transform_calculate(
- Percentage="datum.TotalUnder / datum.TotalMeasures"
- ).encode(
- x=alt.X(
- "MinValue:Q",
- title="Value",
- ),
- y=alt.Y(
- "TotalUnder:Q",
- axis=alt.Axis(format="s"),
- title="Outliers frequency",
- ),
- tooltip=[
- alt.Tooltip(
+ terminology_underlier = terminology_outlier_base.transform_calculate(
+ Percentage="datum.TotalUnder / datum.TotalMeasures"
+ ).encode(
+ x=alt.X(
"MinValue:Q",
- title="Minimum threshold (computed with MAD formula)",
- format=",",
- ),
- alt.Tooltip(
- "TotalUnder:Q",
- title="Frequency under the minimum",
+ title="Value",
),
- alt.Tooltip(
- "Percentage:Q",
- format=".2%",
+ y=alt.Y(
+ "max(TotalUnder):Q",
+ axis=alt.Axis(format="s"),
+ title="Outliers frequency",
),
- ],
- )
+ tooltip=[
+ alt.Tooltip(
+ "MinValue:Q",
+ title="Minimum threshold (computed with MAD formula)",
+ format=",",
+ ),
+ alt.Tooltip(
+ "TotalUnder:Q",
+ title="Frequency under the minimum",
+ ),
+ alt.Tooltip(
+ "Percentage:Q",
+ format=".2%",
+ ),
+ ],
+ )
- terminology_distribution_base = (
- alt.layer(
- terminology_density,
- alt.layer(terminology_underlier, terminology_overlier).resolve_scale(
- y="shared"
- ),
+ terminology_distribution_base = (
+ (
+ terminology_density
+ + (terminology_underlier + terminology_overlier).resolve_scale(
+ y="shared"
+ )
+ )
+ .transform_filter(hospital_selection)
+ .resolve_scale(y="independent")
)
- .transform_filter(hospital_selection)
- .add_selection(value_selection)
- ).resolve_scale(y="independent")
- for terminology in standard_terminologies:
- terminology_selection = alt.selection_multi(
+ terminology_distribution = (
+ (
+ terminology_distribution_base.facet(
+ row=alt.Row(
+ "{}_concept_code:N".format(terminology),
+ sort={
+ "field": "frequency",
+ "op": "sum",
+ "order": "descending",
+ },
+ )
+ )
+ )
+ .resolve_scale(y="independent")
+ .properties(
+ title=alt.TitleParams(
+ text="Distribution per {} code".format(terminology),
+ anchor="middle",
+ align="center",
+ )
+ )
+ )
+ terminologies_distribution.append(terminology_distribution)
+ terminology_selection = alt.selection_point(
fields=["{}_concept_code".format(terminology)],
)
terminologies_selection.append(terminology_selection)
@@ -443,10 +457,9 @@ def plot_interactive_distribution(measurement: DataFrame):
),
alt.value("lightgray"),
)
-
terminology_hist = (
alt.Chart(measurement)
- .mark_bar()
+ .mark_bar(cornerRadiusEnd=10)
.encode(
y=alt.Y(
"sum(frequency):Q",
@@ -465,36 +478,11 @@ def plot_interactive_distribution(measurement: DataFrame):
color=terminology_color,
tooltip=alt.Tooltip("sum(frequency):Q", format=","),
)
- .add_selection(terminology_selection)
+ .add_params(terminology_selection)
.transform_filter(value_selection)
.transform_filter(hospital_selection)
)
-
terminologies_hist.append(terminology_hist.properties(width=width))
-
- terminology_distribution = (
- (
- terminology_distribution_base.facet(
- row=alt.Row(
- "{}_concept_code:N".format(terminology),
- sort={
- "field": "frequency",
- "op": "sum",
- "order": "descending",
- },
- )
- )
- )
- .resolve_scale(y="independent")
- .properties(
- title=alt.TitleParams(
- text="Distribution per {} code".format(terminology),
- anchor="middle",
- align="center",
- )
- )
- )
- terminologies_distribution.append(terminology_distribution)
overall_densities.append(overall_density.properties(width=width))
for terminology_selection in terminologies_selection:
@@ -517,13 +505,13 @@ def plot_interactive_distribution(measurement: DataFrame):
lambda terminology_distribution_1, terminology_distribution_2: terminology_distribution_1
| terminology_distribution_2,
terminologies_distribution,
- )
+ ).transform_filter(value_selection)
overall_densities = reduce(
lambda overall_density_1, overall_density_2: alt.hconcat(
overall_density_1, overall_density_2, spacing=75
),
overall_densities,
- )
+ ).add_params(value_selection)
else:
terminologies_hist = alt.Chart().mark_text()
@@ -557,7 +545,7 @@ def plot_interactive_volumetry(
standard_terminologies = _get_standard_terminologies(measurement)
alt.data_transformers.disable_max_rows()
- hospital_selection = alt.selection_multi(fields=["care_site_short_name"])
+ hospital_selection = alt.selection_point(fields=["care_site_short_name"])
time_selection = alt.selection_interval(encodings=["x"])
color_hospital = alt.condition(
hospital_selection,
@@ -567,7 +555,7 @@ def plot_interactive_volumetry(
hospital_hist = (
alt.Chart(measurement)
- .mark_bar()
+ .mark_bar(cornerRadiusEnd=10)
.encode(
y=alt.Y(
"sum(# measurements):Q",
@@ -582,7 +570,7 @@ def plot_interactive_volumetry(
color=color_hospital,
tooltip=alt.Tooltip("sum(# measurements):Q", format=","),
)
- .add_selection(hospital_selection)
+ .add_params(hospital_selection)
.transform_filter(time_selection)
).properties(width=900, height=300)
@@ -602,7 +590,7 @@ def plot_interactive_volumetry(
title="Total number of measurements",
),
)
- .add_selection(time_selection)
+ .add_params(time_selection)
.transform_filter(hospital_selection)
).properties(width=900, height=50)
@@ -616,11 +604,11 @@ def plot_interactive_volumetry(
.transform_calculate(
Percentage="datum.Missing / (datum.TotalMeasures + datum.Missing)"
)
- .mark_bar()
+ .mark_bar(cornerRadiusEnd=10)
.encode(
y=alt.Y(
- "Percentage:Q",
- axis=alt.Axis(format="%"),
+ "min(Percentage):Q",
+ axis=alt.Axis(format=".0%"),
title="Percentage of missing values per hospital",
),
x=alt.X(
@@ -643,7 +631,7 @@ def plot_interactive_volumetry(
width = 900 / len(standard_terminologies)
for terminology in standard_terminologies:
- terminology_selection = alt.selection_multi(
+ terminology_selection = alt.selection_point(
fields=["{}_concept_code".format(terminology)]
)
terminologies_selection.append(terminology_selection)
@@ -656,7 +644,7 @@ def plot_interactive_volumetry(
terminology_hist = (
alt.Chart(measurement)
- .mark_bar()
+ .mark_bar(cornerRadiusEnd=10)
.encode(
y=alt.Y(
"sum(# measurements):Q",
@@ -675,7 +663,7 @@ def plot_interactive_volumetry(
color=terminology_color,
tooltip=alt.Tooltip("sum(# measurements):Q", format=","),
)
- .add_selection(terminology_selection)
+ .add_params(terminology_selection)
.transform_filter(hospital_selection)
.transform_filter(time_selection)
).properties(height=300, width=width)
@@ -764,7 +752,6 @@ def plot_interactive_volumetry(
def _get_standard_terminologies(measurement):
-
standard_terminologies = list(
set(
col_name.split("_concept_code")[0]
@@ -791,7 +778,6 @@ def _get_standard_terminologies(measurement):
def _filter_zeros(measurement):
-
count_cols = ["# measurements", "# missing_values"]
# Remove rows with all 0
@@ -799,492 +785,3 @@ def _filter_zeros(measurement):
measurement = measurement.dropna(how="all", subset=count_cols)
return measurement
-
-
-# def plot_interactive_distribution_with_time(
-# measurement: DataFrame,
-# ):
-
-# standard_terminologies = _get_standard_terminologies(measurement)
-
-# measurement["over_freq"] = measurement["frequency"].where(
-# measurement["over_outlier"], 0
-# )
-# measurement["under_freq"] = measurement["frequency"].where(
-# measurement["under_outlier"], 0
-# )
-# measurement["inside_freq"] = measurement["frequency"].where(
-# ~measurement["over_outlier"] & ~measurement["under_outlier"], 0
-# )
-# measurement["legend_outlier"] = "Outliers grouped at the thresholds"
-
-# delta_time = (
-# measurement["measurement_month"]
-# .astype("datetime64")
-# .dt.to_period("M")
-# .view(dtype="int64")
-# .drop_duplicates()
-# .sort_values()
-# .diff()
-# .min()
-# )
-
-# if delta_time == 1:
-# time_axis = alt.Axis(tickCount="month", labelAngle=-90, format="%b %Y")
-# elif delta_time == 3:
-# time_axis = alt.Axis(tickCount="month", labelAngle=-90, format="%YQ%q")
-# else:
-# time_axis = alt.Axis(tickCount="year", labelAngle=-90, format="%Y")
-
-# alt.data_transformers.disable_max_rows()
-# hospital_selection = alt.selection_multi(fields=["care_site_short_name"])
-# time_selection = alt.selection_interval(encodings=["x"])
-# value_selection = alt.selection_interval(bind="scales", encodings=["x"])
-# color_hospital = alt.condition(
-# hospital_selection,
-# alt.Color(
-# "care_site_short_name:N", legend=None, scale=alt.Scale(scheme="accent")
-# ),
-# alt.value("lightgray"),
-# )
-
-# time_line = (
-# alt.Chart(measurement)
-# .mark_line()
-# .encode(
-# x=alt.X(
-# "measurement_month:T",
-# title="Time",
-# axis=time_axis,
-# ),
-# y=alt.Y(
-# "sum(frequency):Q",
-# axis=alt.Axis(format="s"),
-# impute=alt.ImputeParams(value=0),
-# title="Total number of measurements",
-# ),
-# )
-# .add_selection(time_selection)
-# .transform_filter(value_selection)
-# .transform_filter(hospital_selection)
-# ).properties(width=900, height=50)
-
-# hospital_hist = (
-# alt.Chart(measurement)
-# .mark_bar()
-# .encode(
-# y=alt.Y(
-# "sum(frequency):Q",
-# axis=alt.Axis(format="s"),
-# title="Number of measurements per hospital",
-# ),
-# x=alt.X(
-# "care_site_short_name:N",
-# title="Hospital",
-# sort={"field": "frequency", "op": "sum", "order": "descending"},
-# ),
-# color=color_hospital,
-# tooltip=alt.Tooltip("sum(frequency):Q", format=","),
-# )
-# .add_selection(hospital_selection)
-# .transform_filter(value_selection)
-# .transform_filter(time_selection)
-# ).properties(width=900)
-
-# # Density Chart
-# overall_dist_base = (
-# alt.Chart(measurement, title="Overall distribution")
-# ).properties(height=100)
-
-# overall_density = (
-# overall_dist_base.transform_window(
-# sort=[{"field": "binned_value"}], cumulative_count="sum(inside_freq)"
-# )
-# .transform_window(
-# sort=[{"field": "cumulative_count"}],
-# window=[
-# {
-# "field": "cumulative_count",
-# "op": "ntile",
-# "as": "quartile",
-# "param": 4,
-# }
-# ],
-# )
-# .mark_bar()
-# .encode(
-# x=alt.X(
-# "binned_value:Q",
-# title="Value",
-# ),
-# y=alt.Y(
-# "sum(inside_freq):Q",
-# axis=alt.Axis(format="s"),
-# title="Overall frequency",
-# ),
-# color=alt.Color(
-# "quartile:O",
-# scale=alt.Scale(scheme="pastel1"),
-# legend=None,
-# ),
-# tooltip=[
-# alt.Tooltip("binned_value:Q", title="Value", format=","),
-# alt.Tooltip("sum(inside_freq):Q", title="Frequency", format=","),
-# ],
-# )
-# )
-
-# overall_outlier_base = (
-# overall_dist_base.transform_joinaggregate(
-# TotalMeasures="sum(frequency)",
-# TotalOver="sum(over_freq)",
-# MaxValue="max(binned_value)",
-# TotalUnder="sum(under_freq)",
-# MinValue="min(binned_value)",
-# )
-# .mark_bar(color="grey")
-# .encode(
-# opacity=alt.Opacity(
-# "legend_outlier",
-# legend=alt.Legend(orient="bottom", title=None),
-# )
-# )
-# )
-
-# overall_overlier = overall_outlier_base.transform_calculate(
-# Percentage="datum.TotalOver / datum.TotalMeasures"
-# ).encode(
-# x=alt.X(
-# "MaxValue:Q",
-# title="Value",
-# ),
-# y=alt.Y(
-# "TotalOver:Q",
-# axis=alt.Axis(format="s"),
-# title="Outliers frequency",
-# ),
-# tooltip=[
-# alt.Tooltip(
-# "MaxValue:Q",
-# title="Maximum threshold (computed with MAD formula)",
-# format=",",
-# ),
-# alt.Tooltip(
-# "TotalOver:Q",
-# title="Frequency over the maximum",
-# ),
-# alt.Tooltip(
-# "Percentage:Q",
-# format=".2%",
-# ),
-# ],
-# )
-# overall_underlier = overall_outlier_base.transform_calculate(
-# Percentage="datum.TotalUnder / datum.TotalMeasures"
-# ).encode(
-# x=alt.X(
-# "MinValue:Q",
-# title="Value",
-# ),
-# y=alt.Y(
-# "TotalUnder:Q",
-# axis=alt.Axis(format="s"),
-# title="Outliers frequency",
-# ),
-# tooltip=[
-# alt.Tooltip(
-# "MinValue:Q",
-# title="Minimum threshold (computed with MAD formula)",
-# format=",",
-# ),
-# alt.Tooltip(
-# "TotalUnder:Q",
-# title="Frequency under the minimum",
-# ),
-# alt.Tooltip(
-# "Percentage:Q",
-# format=".2%",
-# ),
-# ],
-# )
-
-# overall_density = (
-# alt.layer(
-# overall_density,
-# alt.layer(overall_underlier, overall_overlier).resolve_scale(y="shared"),
-# )
-# .resolve_scale(y="independent")
-# .transform_filter(hospital_selection)
-# .add_selection(value_selection)
-# )
-# if standard_terminologies:
-# terminologies_hist = []
-# terminologies_distribution = []
-# terminologies_selection = []
-# overall_densities = []
-# width = 900 / len(standard_terminologies)
-
-# terminology_dist_base = (alt.Chart(measurement)).properties(
-# height=100, width=width
-# )
-# terminology_density = (
-# terminology_dist_base.transform_window(
-# sort=[{"field": "binned_value"}],
-# groupby=[
-# "{}_concept_code".format(terminology)
-# for terminology in standard_terminologies
-# ],
-# cumulative_count="sum(inside_freq)",
-# )
-# .transform_window(
-# sort=[{"field": "cumulative_count"}],
-# groupby=[
-# "{}_concept_code".format(terminology)
-# for terminology in standard_terminologies
-# ],
-# window=[
-# {
-# "field": "cumulative_count",
-# "op": "ntile",
-# "as": "quartile",
-# "param": 4,
-# }
-# ],
-# )
-# .mark_bar()
-# .encode(
-# x=alt.X(
-# "binned_value:Q",
-# title="Value",
-# ),
-# y=alt.Y(
-# "sum(inside_freq):Q",
-# axis=alt.Axis(format="s"),
-# title="Frequency",
-# ),
-# color=alt.Color(
-# "quartile:O",
-# scale=alt.Scale(domain=[1, 2, 3, 4], scheme="pastel1"),
-# legend=alt.Legend(orient="bottom", title="Quartile"),
-# ),
-# tooltip=[
-# alt.Tooltip("binned_value:Q", title="Value", format=","),
-# alt.Tooltip("sum(inside_freq):Q", title="Frequency", format=","),
-# ],
-# )
-# )
-
-# terminology_outlier_base = (
-# terminology_dist_base.transform_joinaggregate(
-# TotalMeasures="sum(frequency)",
-# TotalOver="sum(over_freq)",
-# MaxValue="max(binned_value)",
-# TotalUnder="sum(under_freq)",
-# MinValue="min(binned_value)",
-# groupby=[
-# "{}_concept_code:N".format(terminology)
-# for terminology in standard_terminologies
-# ],
-# )
-# .mark_bar(color="gray")
-# .encode(
-# opacity=alt.Opacity(
-# "legend_outlier",
-# legend=alt.Legend(orient="bottom", title=None),
-# )
-# )
-# )
-
-# terminology_overlier = terminology_outlier_base.transform_calculate(
-# Percentage="datum.TotalOver / datum.TotalMeasures"
-# ).encode(
-# x=alt.X(
-# "MaxValue:Q",
-# title="Value",
-# ),
-# y=alt.Y(
-# "TotalOver:Q",
-# axis=alt.Axis(format="s"),
-# title="Outliers frequency",
-# ),
-# tooltip=[
-# alt.Tooltip(
-# "MaxValue:Q",
-# title="Maximum threshold (computed with MAD formula)",
-# format=",",
-# ),
-# alt.Tooltip(
-# "TotalOver:Q",
-# title="Frequency over the maximum",
-# ),
-# alt.Tooltip(
-# "Percentage:Q",
-# format=".2%",
-# ),
-# ],
-# )
-
-# terminology_underlier = terminology_outlier_base.transform_calculate(
-# Percentage="datum.TotalUnder / datum.TotalMeasures"
-# ).encode(
-# x=alt.X(
-# "MinValue:Q",
-# title="Value",
-# ),
-# y=alt.Y(
-# "TotalUnder:Q",
-# axis=alt.Axis(format="s"),
-# title="Outliers frequency",
-# ),
-# tooltip=[
-# alt.Tooltip(
-# "MinValue:Q",
-# title="Minimum threshold (computed with MAD formula)",
-# format=",",
-# ),
-# alt.Tooltip(
-# "TotalUnder:Q",
-# title="Frequency under the minimum",
-# ),
-# alt.Tooltip(
-# "Percentage:Q",
-# format=".2%",
-# ),
-# ],
-# )
-# terminology_distribution_base = (
-# alt.layer(
-# terminology_density,
-# alt.layer(terminology_underlier, terminology_overlier).resolve_scale(
-# y="shared"
-# ),
-# )
-# .transform_filter(time_selection)
-# .transform_filter(hospital_selection)
-# .add_selection(value_selection)
-# ).resolve_scale(y="independent")
-
-# for terminology in standard_terminologies:
-# terminology_selection = alt.selection_multi(
-# fields=["{}_concept_code".format(terminology)],
-# )
-# terminologies_selection.append(terminology_selection)
-
-# terminology_color = alt.condition(
-# terminology_selection,
-# alt.Color(
-# "{}_concept_code:N".format(terminology),
-# legend=None,
-# scale=alt.Scale(scheme="pastel2"),
-# ),
-# alt.value("lightgray"),
-# )
-
-# terminology_hist = (
-# alt.Chart(measurement)
-# .mark_bar()
-# .encode(
-# y=alt.Y(
-# "sum(frequency):Q",
-# axis=alt.Axis(format="s"),
-# title="Number of measurements per {} code".format(terminology),
-# ),
-# x=alt.X(
-# "{}_concept_code:N".format(terminology),
-# title="{} code".format(terminology),
-# sort={
-# "field": "frequency",
-# "op": "sum",
-# "order": "descending",
-# },
-# ),
-# color=terminology_color,
-# tooltip=alt.Tooltip("sum(frequency):Q", format=","),
-# )
-# .add_selection(terminology_selection)
-# .transform_filter(hospital_selection)
-# .transform_filter(value_selection)
-# .transform_filter(time_selection)
-# )
-
-# terminology_distribution = (
-# (
-# terminology_distribution_base.facet(
-# row=alt.Row(
-# "{}_concept_code:N".format(terminology),
-# sort={
-# "field": "frequency",
-# "op": "sum",
-# "order": "descending",
-# },
-# )
-# )
-# )
-# .resolve_scale(y="independent")
-# .properties(
-# title=alt.TitleParams(
-# text="Distribution per {} code".format(terminology),
-# anchor="middle",
-# align="center",
-# )
-# )
-# )
-
-# terminologies_hist.append(terminology_hist.properties(width=width))
-
-# terminologies_distribution.append(terminology_distribution)
-# overall_densities.append(overall_density.properties(width=width))
-
-# for terminology_selection in terminologies_selection:
-# hospital_hist = hospital_hist.transform_filter(terminology_selection)
-# time_line = time_line.transform_filter(terminology_selection)
-# for idx in range(len(standard_terminologies)):
-# if idx != terminologies_selection.index(terminology_selection):
-# terminologies_hist[idx] = terminologies_hist[idx].transform_filter(
-# terminology_selection
-# )
-# terminologies_distribution[idx] = terminologies_distribution[
-# idx
-# ].transform_filter(terminology_selection)
-
-# terminologies_hist = reduce(
-# lambda terminology_hist_1, terminology_hist_2: terminology_hist_1
-# | terminology_hist_2,
-# terminologies_hist,
-# )
-# terminologies_distribution = reduce(
-# lambda terminology_distribution_1, terminology_distribution_2: terminology_distribution_1
-# | terminology_distribution_2,
-# terminologies_distribution,
-# )
-# overall_densities = reduce(
-# lambda overall_density_1, overall_density_2: alt.hconcat(
-# overall_density_1, overall_density_2, spacing=75
-# ),
-# overall_densities,
-# )
-
-# else:
-# terminologies_hist = alt.Chart().mark_text()
-# terminologies_distribution = alt.Chart().mark_text()
-# overall_densities = (
-# overall_density.encode(
-# color=alt.Color(
-# "quartile:O",
-# scale=alt.Scale(domain=[1, 2, 3, 4], scheme="pastel1"),
-# legend=alt.Legend(orient="bottom", title="Quartile"),
-# ),
-# )
-# .transform_filter(hospital_selection)
-# .properties(width=900)
-# )
-
-# chart = (
-# hospital_hist
-# & time_line
-# & terminologies_hist
-# & overall_densities
-# & terminologies_distribution
-# ).resolve_scale(color="independent")
-
-# return chart
diff --git a/eds_scikit/plot/age_pyramid.py b/eds_scikit/plot/age_pyramid.py
index cbbbd5ce..0c5a8dd0 100644
--- a/eds_scikit/plot/age_pyramid.py
+++ b/eds_scikit/plot/age_pyramid.py
@@ -137,8 +137,8 @@ def plot_age_pyramid(
alt.Chart(male)
.mark_text()
.encode(
- y=alt.Text("age_bins", axis=None, sort=alt.SortOrder("descending")),
- text=alt.Y("age_bins"),
+ y=alt.Y("age_bins", axis=None, sort=alt.SortOrder("descending")),
+ text=alt.Text("age_bins"),
)
)