-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
) Co-authored-by: Thomas Petit-Jean <30775613+Thomzoy@users.noreply.github.com> Co-authored-by: Matthieu Doutreligne <matt.dout@gmail.com>
- Loading branch information
1 parent
24bf047
commit 001fe9b
Showing
7 changed files
with
152 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Visualizing age pyramid | ||
|
||
The age pyramid is helpful to quickly visualize the age and gender distributions in a cohort. | ||
|
||
## Load a synthetic dataset | ||
|
||
`plot_age_pyramid` uses the "person" table: | ||
|
||
```python | ||
from eds_scikit.datasets.synthetic.person import load_person | ||
|
||
df_person = load_person() | ||
df_person.head() | ||
``` | ||
|
||
```python | ||
# Out: person_id gender_source_value birth_datetime | ||
0 0 m 2010-01-01 | ||
1 1 m 1938-01-01 | ||
2 2 f 1994-01-01 | ||
3 3 m 1994-01-01 | ||
4 4 m 2004-01-01 | ||
``` | ||
|
||
## Visualize age pyramid | ||
|
||
### Basic usage | ||
|
||
By default, `plot_age_pyramid` will compute age as the difference between today and the date of birth: | ||
|
||
```python | ||
from eds_scikit.plot.age_pyramid import plot_age_pyramid | ||
|
||
plot_age_pyramid(df_person) | ||
``` | ||
|
||
 | ||
|
||
### Advanced parameters | ||
|
||
Further configuration can be provided, including : | ||
|
||
- `datetime_ref` : Choose the reference to compute the age from. | ||
It can be either a single datetime (string or datetime type), an array of datetime | ||
(one reference for each patient) or a string representing a column of the input dataframe | ||
- `return_array`: If set to True, return a dataframe instead of a chart. | ||
|
||
```python | ||
import pandas as pd | ||
from datetime import datetime | ||
from eds_scikit.plot.age_pyramid import plot_age_pyramid | ||
|
||
dates_of_first_visit = pd.Series([datetime(2020, 1, 1)] * df_person.shape[0]) | ||
plot_age_pyramid(df_person, datetime_ref=dates_of_first_visit) | ||
``` | ||
|
||
 | ||
|
||
|
||
Please check [the documentation][eds_scikit.plot.age_pyramid.plot_age_pyramid] for further details on the function's parameters. |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,64 +1,62 @@ | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
import altair as alt | ||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
from pandas.core.series import Series | ||
from pandas.testing import assert_frame_equal | ||
|
||
from eds_scikit.datasets.synthetic.person import load_person | ||
from eds_scikit.plot.data_quality import plot_age_pyramid | ||
from eds_scikit.plot.age_pyramid import plot_age_pyramid | ||
|
||
data = load_person() | ||
|
||
person_with_inclusion_date = data.person.copy() | ||
N = len(person_with_inclusion_date) | ||
delta_days = pd.to_timedelta(np.random.randint(0, 1000, N), unit="d") | ||
|
||
@pytest.mark.parametrize( | ||
"datetime_ref", | ||
[ | ||
None, | ||
datetime(2020, 1, 1), | ||
np.full(data.person.shape[0], datetime(2020, 1, 1)), | ||
], | ||
person_with_inclusion_date["inclusion_datetime"] = ( | ||
person_with_inclusion_date["birth_datetime"] + delta_days | ||
) | ||
def test_age_pyramid_datetime_ref_format(datetime_ref): | ||
original_person = data.person.copy() | ||
|
||
chart = plot_age_pyramid( | ||
data.person, datetime_ref, savefig=False, return_vector=False | ||
) | ||
|
||
@pytest.mark.parametrize( | ||
"datetime_ref", [datetime(2020, 1, 1), "inclusion_datetime", "2020-01-01"] | ||
) | ||
def test_plot_age_pyramid(datetime_ref): | ||
original_person = person_with_inclusion_date.copy() | ||
chart = plot_age_pyramid(person_with_inclusion_date, datetime_ref) | ||
assert isinstance(chart, alt.vegalite.v4.api.ConcatChart) | ||
|
||
# Check that the data is unchanged | ||
assert_frame_equal(original_person, data.person) | ||
assert_frame_equal(original_person, person_with_inclusion_date) | ||
|
||
|
||
def test_age_pyramid_output(): | ||
|
||
filename = "test.html" | ||
plot_age_pyramid(data.person, savefig=True, filename=filename) | ||
path = Path(filename) | ||
assert path.exists() | ||
path.unlink() | ||
|
||
group_gender_age = plot_age_pyramid( | ||
data.person, savefig=True, return_vector=True, filename=filename | ||
) | ||
assert isinstance(group_gender_age, Series) | ||
|
||
chart, group_gender_age = plot_age_pyramid( | ||
data.person, savefig=False, return_vector=True | ||
) | ||
chart = plot_age_pyramid(data.person) | ||
assert isinstance(chart, alt.vegalite.v4.api.ConcatChart) | ||
|
||
group_gender_age = plot_age_pyramid(data.person, return_array=True) | ||
assert isinstance(group_gender_age, Series) | ||
|
||
chart = plot_age_pyramid(data.person, savefig=False, return_vector=False) | ||
assert isinstance(chart, alt.vegalite.v4.api.ConcatChart) | ||
|
||
with pytest.raises(ValueError, match="You have to set a filename"): | ||
_ = plot_age_pyramid(data.person, savefig=True, filename=None) | ||
def test_plot_age_pyramid_datetime_ref_error(): | ||
with pytest.raises( | ||
ValueError, | ||
match="`datetime_ref` must either be a column name or parseable date, got string '20x2-01-01'", | ||
): | ||
_ = plot_age_pyramid( | ||
person_with_inclusion_date, | ||
datetime_ref="20x2-01-01", | ||
) | ||
|
||
with pytest.raises( | ||
ValueError, match="'filename' type must be str, got <class 'list'>" | ||
TypeError, | ||
match="`datetime_ref` must be either None, a parseable string date, a column name or a datetime. Got type: <class 'int'>, 2022", | ||
): | ||
_ = plot_age_pyramid(data.person, savefig=True, filename=[1]) | ||
_ = plot_age_pyramid( | ||
person_with_inclusion_date, | ||
datetime_ref=2022, | ||
) |