diff --git a/changelog.md b/changelog.md
index 581e6fbe..eb0b36a3 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## Pending
+
+### Added
+
+- Adding person-dependant `datetime_ref` to `plot_age_pyramid`
+
 ## v0.1.2 (2022-12-05)
 
 ### Added
diff --git a/eds_scikit/plot/data_quality.py b/eds_scikit/plot/data_quality.py
index 9ae9ae1c..8c6bc613 100644
--- a/eds_scikit/plot/data_quality.py
+++ b/eds_scikit/plot/data_quality.py
@@ -1,11 +1,12 @@
+from copy import copy
 from datetime import datetime
-from typing import Tuple
+from typing import Tuple, Union
 
 import altair as alt
 import numpy as np
 import pandas as pd
+from pandas.api.types import is_integer_dtype
 from pandas.core.frame import DataFrame
-from pandas.core.series import Series
 
 from ..utils.checks import check_columns
 from ..utils.framework import bd
@@ -13,11 +14,11 @@
 
 def plot_age_pyramid(
     person: DataFrame,
-    datetime_ref: datetime = None,
+    datetime_ref: Union[datetime, str] = None,
     filename: str = None,
     savefig: bool = False,
     return_vector: bool = False,
-) -> Tuple[alt.Chart, Series]:
+) -> Tuple[alt.Chart, DataFrame]:
     """Plot an age pyramid from a 'person' pandas DataFrame.
 
     Parameters
@@ -28,8 +29,10 @@ def plot_age_pyramid(
         - `person_id`, dtype : any
         - `gender_source_value`, dtype : str, {'m', 'f'}
 
-    datetime_ref : datetime,
+    datetime_ref : Union[datetime, str],
         The reference date to compute population age from.
+        If a string, it searches for a column with the same name in the person table: each patient has his own datetime reference.
+        If a datetime, the reference datetime is the same for all patients.
         If set to None, datetime.today() will be used instead.
 
     savefig : bool,
@@ -54,41 +57,63 @@ def plot_age_pyramid(
             raise ValueError("You have to set a filename")
         if not isinstance(filename, str):
             raise ValueError(f"'filename' type must be str, got {type(filename)}")
-
-    person_ = person.copy()
+    datetime_ref_original = copy(datetime_ref)
 
     if datetime_ref is None:
-        today = datetime.today()
+        datetime_ref = datetime.today()
+    elif isinstance(datetime_ref, datetime):
+        datetime_ref = pd.to_datetime(datetime_ref)
+    elif isinstance(datetime_ref, str):
+        if datetime_ref in person.columns:
+            datetime_ref = person[datetime_ref]
+        else:
+            datetime_ref = pd.to_datetime(
+                datetime_ref, errors="coerce"
+            )  # In case of error, will return NaT
+            if pd.isnull(datetime_ref):
+                raise ValueError(
+                    f"`datetime_ref` must either be a column name or parseable date, "
+                    f"got string '{datetime_ref_original}'"
+                )
     else:
-        today = pd.to_datetime(datetime_ref)
+        raise TypeError(
+            f"`datetime_ref` must be either None, a parseable string date"
+            f", a column name or a datetime. Got type: {type(datetime_ref)}, {datetime_ref}"
+        )
 
-    # TODO: replace with from ..utils.datetime_helpers.substract_datetime
-    deltas = today - person_["birth_datetime"]
-    if bd.is_pandas(person_):
-        deltas = deltas.dt.total_seconds()
+    person = person.loc[person["gender_source_value"].isin(["m", "f"])]
 
-    person_["age"] = deltas / (365 * 24 * 3600)
-    person_ = person_.query("age > 0.0")
+    deltas = datetime_ref - person["birth_datetime"]
+    if not is_integer_dtype(deltas):
+        deltas = deltas.dt.total_seconds()
+    person["age"] = deltas / 365 * 24 * 3600
 
     bins = np.arange(0, 100, 10)
     labels = [f"{left}-{right}" for left, right in zip(bins[:-1], bins[1:])]
-    person_["age_bins"] = bd.cut(person_["age"], bins=bins, labels=labels)
 
-    person_["age_bins"] = (
-        person_["age_bins"].astype(str).str.lower().str.replace("nan", "90+")
-    )
+    # This is equivalent to `pd.cut()` for pandas and this call our custom `cut`
+    # implementation for koalas.
+    person["age_bins"] = bd.cut(person["age"], bins=bins, labels=labels)
+
+    # This is equivalent to `person.cache()` for koalas and this is a no-op
+    # for pandas.
+    # Cache the intermediate results of the transformation so that other transformation
+    # runs on top of cached will perform faster.
+    # TODO: try to remove it and check perfs.
+    bd.cache(person)
 
-    person_ = person_.loc[person_["gender_source_value"].isin(["m", "f"])]
-    group_gender_age = person_.groupby(["gender_source_value", "age_bins"])[
-        "person_id"
-    ].count()
+    group = person.groupby(["gender_source_value", "age_bins"])["person_id"].count()
 
     # Convert to pandas to ease plotting.
-    # Since we have aggregated the data, this operation won't crash.
-    group_gender_age = bd.to_pandas(group_gender_age)
+    group = bd.to_pandas(group)
+
+    group = group.to_frame().reset_index()
+    group["age_bins"] = (
+        group["age_bins"].astype(str).str.lower().str.replace("nan", "90+")
+    )
 
-    male = group_gender_age["m"].reset_index()
-    female = group_gender_age["f"].reset_index()
+    male = group.loc[group["gender_source_value"] == "m"].reset_index()
+    female = group.loc[group["gender_source_value"] == "f"].reset_index()
 
     left = (
         alt.Chart(male)
@@ -124,9 +149,9 @@ def plot_age_pyramid(
     if savefig:
         chart.save(filename)
         if return_vector:
-            return group_gender_age
+            return group
 
     if return_vector:
-        return chart, group_gender_age
+        return chart, group
 
     return chart
diff --git a/eds_scikit/utils/custom_implem/custom_implem.py b/eds_scikit/utils/custom_implem/custom_implem.py
index 81c5ae2a..55430eb9 100644
--- a/eds_scikit/utils/custom_implem/custom_implem.py
+++ b/eds_scikit/utils/custom_implem/custom_implem.py
@@ -13,6 +13,17 @@ class CustomImplem:
     All public facing methods must be stateless and defined as classmethods.
     """
 
+    @classmethod
+    def cache(cls, obj: DataFrame, backend=None) -> None:
+        """Run df.cache() for Koalas. No-op for pandas."""
+        if backend is pd:
+            return
+        elif backend is ks:
+            obj.spark.cache()
+            return
+        else:
+            raise ValueError(f"Unknown backend {backend}")
+
     @classmethod
     def add_unique_id(
         cls,
@@ -27,9 +38,7 @@ def add_unique_id(
         elif backend is ks:
             return obj.koalas.attach_id_column(id_type="distributed", column=col_name)
         else:
-            raise NotImplementedError(
-                f"No method 'add_unique_id' is available for backend '{backend}'."
-            )
+            raise ValueError(f"Unknown backend {backend}")
 
     @classmethod
     def cut(
diff --git a/eds_scikit/utils/custom_implem/cut.py b/eds_scikit/utils/custom_implem/cut.py
index 85dbc2b5..7cc247c7 100644
--- a/eds_scikit/utils/custom_implem/cut.py
+++ b/eds_scikit/utils/custom_implem/cut.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pandas as pd
 import pandas.core.algorithms as algos
+from databricks import koalas as ks
 from pandas import IntervalIndex, to_datetime, to_timedelta
 from pandas._libs import Timedelta, Timestamp
 from pandas._libs.lib import infer_dtype
@@ -371,7 +372,9 @@ def _bins_to_cuts(
     # hack to bypass "TypeError: 'Series' object does not support item assignment"
     ids = ids.to_frame()
     ids.loc[na_mask] = 0
-    ids = ids[ids.columns[0]]
+    ids.columns = ["key"]
+    ids["key"] -= 1
+    # ids = ids[ids.columns[0]]
 
     if labels:
         if not (labels is None or is_list_like(labels)):
@@ -400,17 +403,16 @@ def _bins_to_cuts(
                 ordered=ordered,
             )
 
-        label_mapping = dict(zip(range(len(labels)), labels))
+        labels = ks.DataFrame({"key": range(len(labels)), "val": labels})
         # x values outside of bins edges (i.e. when ids = 0) are mapped to NaN
-        result = (ids - 1).map(label_mapping)
-        result.fillna(np.nan, inplace=True)
+        result = ids.merge(labels, on="key", how="left")
+        # result = (ids - 1).map(label_mapping)
+        result = result["val"].fillna(np.nan)
 
     else:
-        result = ids - 1
         # hack to bypass "TypeError: 'Series' object does not support item assignment"
-        result = result.to_frame()
-        result.loc[na_mask] = np.nan
-        result = result[result.columns[0]]
+        ids.loc[na_mask] = np.nan
+        result = result["val"]
 
     return result, bins
 
diff --git a/tests/test_age_pyramid.py b/tests/test_age_pyramid.py
index 98b6c7f0..bd37118a 100644
--- a/tests/test_age_pyramid.py
+++ b/tests/test_age_pyramid.py
@@ -3,8 +3,9 @@
 
 import altair as alt
 import numpy as np
+import pandas as pd
 import pytest
-from pandas.core.series import Series
+from pandas.core.frame import DataFrame
 from pandas.testing import assert_frame_equal
 
 from eds_scikit.datasets.synthetic.person import load_person
@@ -12,25 +13,25 @@
 
 data = load_person()
 
+person_with_inclusion_date = data.person.copy()
+N = len(person_with_inclusion_date)
+delta_days = pd.to_timedelta(np.random.randint(0, 1000, N), unit="d")
 
-@pytest.mark.parametrize(
-    "datetime_ref",
-    [
-        None,
-        datetime(2020, 1, 1),
-        np.full(data.person.shape[0], datetime(2020, 1, 1)),
-    ],
+person_with_inclusion_date["inclusion_datetime"] = (
+    person_with_inclusion_date["birth_datetime"] + delta_days
 )
-def test_age_pyramid_datetime_ref_format(datetime_ref):
-    original_person = data.person.copy()
 
-    chart = plot_age_pyramid(
-        data.person, datetime_ref, savefig=False, return_vector=False
-    )
+
+@pytest.mark.parametrize(
+    "datetime_ref", [datetime(2020, 1, 1), "inclusion_datetime", "2020-01-01"]
+)
+def test_plot_age_pyramid(datetime_ref):
+    original_person = person_with_inclusion_date.copy()
+    chart = plot_age_pyramid(person_with_inclusion_date, datetime_ref, savefig=False)
     assert isinstance(chart, alt.vegalite.v4.api.ConcatChart)
 
     # Check that the data is unchanged
-    assert_frame_equal(original_person, data.person)
+    assert_frame_equal(original_person, person_with_inclusion_date)
 
 
 def test_age_pyramid_output():
@@ -44,21 +45,38 @@ def test_age_pyramid_output():
     group_gender_age = plot_age_pyramid(
         data.person, savefig=True, return_vector=True, filename=filename
     )
-    assert isinstance(group_gender_age, Series)
+    assert isinstance(group_gender_age, DataFrame)
 
     chart, group_gender_age = plot_age_pyramid(
         data.person, savefig=False, return_vector=True
     )
     assert isinstance(chart, alt.vegalite.v4.api.ConcatChart)
-    assert isinstance(group_gender_age, Series)
+    assert isinstance(group_gender_age, DataFrame)
 
     chart = plot_age_pyramid(data.person, savefig=False, return_vector=False)
     assert isinstance(chart, alt.vegalite.v4.api.ConcatChart)
 
     with pytest.raises(ValueError, match="You have to set a filename"):
-        _ = plot_age_pyramid(data.person, savefig=True, filename=None)
+        plot_age_pyramid(person_with_inclusion_date, savefig=True, filename=None)
 
     with pytest.raises(
         ValueError, match="'filename' type must be str, got <class 'list'>"
     ):
-        _ = plot_age_pyramid(data.person, savefig=True, filename=[1])
+        plot_age_pyramid(person_with_inclusion_date, savefig=True, filename=[1])
+
+
+def test_plot_age_pyramid_datetime_ref_error():
+    with pytest.raises(
+        ValueError,
+        match="`datetime_ref` must either be a column name or parseable date, got string '20x2-01-01'",
+    ):
+        _ = plot_age_pyramid(
+            person_with_inclusion_date, datetime_ref="20x2-01-01", savefig=False
+        )
+    with pytest.raises(
+        TypeError,
+        match="`datetime_ref` must be either None, a parseable string date, a column name or a datetime. Got type: <class 'int'>, 2022",
+    ):
+        _ = plot_age_pyramid(
+            person_with_inclusion_date, datetime_ref=2022, savefig=False
+        )