Skip to content

Commit

Permalink
adding condition_concept_codes feature
Browse files Browse the repository at this point in the history
  • Loading branch information
svittoz committed Oct 26, 2023
1 parent 8062883 commit d044ec9
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 16 deletions.
8 changes: 4 additions & 4 deletions edsteva/probes/biology/biology.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def compute_process(
("GLIMS_ANABIO", "ANABIO_ITM", "Mapped from"),
("ANABIO_ITM", "LOINC_ITM", "Maps to"),
],
concept_codes: Union[bool, List[str]] = None,
measurement_concept_codes: Union[bool, List[str]] = None,
concepts_sets: Union[str, Dict[str, str]] = {
"Leucocytes": "A0174|K3232|H6740|E4358|C9784|C8824|E6953",
"Plaquettes": "E4812|C0326|A1636|A0230|H6751|A1598|G7728|G7727|G7833|A2538|A2539|J4463",
Expand Down Expand Up @@ -147,7 +147,7 @@ def compute_process(
**EXAMPLE**: `[("ANALYSES_LABORATOIRE", "GLIMS_ANABIO", "Maps to")]`
concepts_sets: Union[str, Dict[str, str]] , optional
**EXAMPLE**: `{"Créatinine": "E3180|G1974|J1002|A7813|A0094|G1975|J1172|G7834|F9409|F9410|C0697|H4038|F2621", "Leucocytes": r"A0174|K3232|H6740|E4358|C9784|C8824|E6953"}`
concept_codes: Union[bool, List[str]], optional
measurement_concept_codes: Union[bool, List[str]], optional
**EXAMPLE**: ['E3180', 'G1974', 'J1002', 'A7813', 'A0094', 'G1975', 'J1172', 'G7834', 'F9409', 'F9410', 'C0697', 'H4038']`
care_site_ids : List[int], optional
**EXAMPLE**: `[8312056386, 8312027648]`
Expand Down Expand Up @@ -180,7 +180,7 @@ def compute_process(
"""
if not concepts_sets and "concepts_set" in self._index:
self._index.remove("concepts_set")
if not concept_codes:
if not measurement_concept_codes:
for terminology in self._standard_terminologies:
if "{}_concept_code".format(terminology) in self._index:
self._index.remove("{}_concept_code".format(terminology))
Expand Down Expand Up @@ -219,7 +219,7 @@ def compute_process(
care_site_ids=care_site_ids,
care_site_short_names=care_site_short_names,
care_site_specialties=care_site_specialties,
concept_codes=concept_codes,
measurement_concept_codes=measurement_concept_codes,
care_sites_sets=care_sites_sets,
specialties_sets=specialties_sets,
concepts_sets=concepts_sets,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def compute_completeness_predictor_per_measurement(
care_site_ids: List[int],
care_site_short_names: List[str],
care_site_specialties: Union[bool, List[str]],
concept_codes: Union[bool, List[str]],
measurement_concept_codes: Union[bool, List[str]],
care_sites_sets: Union[str, Dict[str, str]],
specialties_sets: Union[str, Dict[str, str]],
concepts_sets: Union[str, Dict[str, str]],
Expand Down Expand Up @@ -96,7 +96,7 @@ def compute_completeness_predictor_per_measurement(
measurement = prepare_measurement(
data=data,
biology_relationship=biology_relationship,
concept_codes=concept_codes,
measurement_concept_codes=measurement_concept_codes,
concepts_sets=concepts_sets,
start_date=start_date,
end_date=end_date,
Expand Down
4 changes: 2 additions & 2 deletions edsteva/probes/biology/completeness_predictors/per_visit.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def compute_completeness_predictor_per_visit(
care_site_ids: List[int],
care_site_short_names: List[str],
care_site_specialties: Union[bool, List[str]],
concept_codes: Union[bool, List[str]],
measurement_concept_codes: Union[bool, List[str]],
care_sites_sets: Union[str, Dict[str, str]],
specialties_sets: Union[str, Dict[str, str]],
concepts_sets: Union[str, Dict[str, str]],
Expand Down Expand Up @@ -130,7 +130,7 @@ def compute_completeness_predictor_per_visit(
measurement = prepare_measurement(
data=data,
biology_relationship=biology_relationship,
concept_codes=concept_codes,
measurement_concept_codes=measurement_concept_codes,
concepts_sets=concepts_sets,
root_terminology=root_terminology,
standard_terminologies=standard_terminologies,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def compute_completeness_predictor_per_condition(
specialties_sets: Union[str, Dict[str, str]],
diag_types: Union[bool, str, Dict[str, str]],
condition_types: Union[bool, str, Dict[str, str]],
condition_concept_codes: Union[bool, List[str]],
source_systems: Union[bool, List[str]],
length_of_stays: List[float],
age_ranges: List[int],
Expand Down Expand Up @@ -96,6 +97,7 @@ def compute_completeness_predictor_per_condition(
source_systems=source_systems,
diag_types=diag_types,
condition_types=condition_types,
condition_concept_codes=condition_concept_codes,
start_date=start_date,
end_date=end_date,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def compute_completeness_predictor_per_visit(
specialties_sets: Union[str, Dict[str, str]],
diag_types: Union[bool, str, Dict[str, str]],
condition_types: Union[bool, str, Dict[str, str]],
condition_concept_codes: Union[bool, List[str]],
source_systems: Union[bool, List[str]],
length_of_stays: List[float],
age_ranges: List[int],
Expand Down
7 changes: 7 additions & 0 deletions edsteva/probes/condition/condition.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
"age_range",
"drg_source",
"gender_source_value",
"condition_source_value",
]
super().__init__(
completeness_predictor=completeness_predictor,
Expand All @@ -71,6 +72,7 @@ def compute_process(
extra_data: Data = None,
diag_types: Union[bool, str, Dict[str, str]] = None,
condition_types: Union[bool, str, Dict[str, str]] = None,
condition_concept_codes: Union[str, List[str]] = None,
source_systems: Union[bool, List[str]] = ["ORBIS"],
care_site_ids: List[int] = None,
care_site_short_names: List[str] = None,
Expand Down Expand Up @@ -103,6 +105,8 @@ def compute_process(
**EXAMPLE**: `{"All": ".*"}` or `{"All": ".*", "DP\DR": "DP|DR"}` or `"DP"`
condition_types: Union[bool, str, Dict[str, str]], optional
**EXAMPLE**: `{"All": ".*"}` or `{"All": ".*", "Pulmonary_embolism": "I26"}`
condition_concept_codes: Union[bool, List[str]], optional
**EXAMPLE**: ['E3180', 'G1974', 'J1002', 'A7813', 'A0094', 'G1975', 'J1172', 'G7834', 'F9409', 'F9410', 'C0697', 'H4038']`
source_systems: Union[bool, List[str]], optional
**EXAMPLE**: `["AREM", "ORBIS"]`
care_site_ids : List[int], optional
Expand Down Expand Up @@ -136,6 +140,8 @@ def compute_process(
self._index.remove("diag_type")
if not condition_types and "condition_type" in self._index:
self._index.remove("condition_type")
if not condition_concept_codes and "condition_source_value" in self._index:
self._index.remove("condition_source_value")
if not source_systems and "source_system" in self._index:
self._index.remove("source_system")
if not care_site_levels and "care_site_level" in self._index:
Expand Down Expand Up @@ -177,6 +183,7 @@ def compute_process(
provenance_sources=provenance_sources,
length_of_stays=length_of_stays,
condition_types=condition_types,
condition_concept_codes=condition_concept_codes,
source_systems=source_systems,
stay_sources=stay_sources,
drg_sources=drg_sources,
Expand Down
18 changes: 10 additions & 8 deletions edsteva/probes/utils/prepare_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def prepare_visit_occurrence(
def prepare_measurement(
data: Data,
biology_relationship: pd.DataFrame,
concept_codes: Union[bool, List[str]],
measurement_concept_codes: Union[bool, List[str]],
concepts_sets: Union[str, Dict[str, str]],
root_terminology: str,
standard_terminologies: List[str],
Expand Down Expand Up @@ -180,13 +180,13 @@ def prepare_measurement(
end_date=end_date,
)

if concept_codes and isinstance(concept_codes, list):
if measurement_concept_codes and isinstance(measurement_concept_codes, list):
measurement_by_terminology = []
for standard_terminology in standard_terminologies:
measurement_by_terminology.append(
measurement[
measurement["{}_concept_code".format(standard_terminology)].isin(
concept_codes
measurement_concept_codes
)
]
)
Expand Down Expand Up @@ -215,6 +215,7 @@ def prepare_condition_occurrence(
source_systems: Union[bool, List[str]],
diag_types: Union[bool, str, Dict[str, str]],
condition_types: Union[bool, str, Dict[str, str]],
condition_concept_code: Union[bool, List[str]] = None,
start_date: datetime = None,
end_date: datetime = None,
):
Expand Down Expand Up @@ -278,16 +279,17 @@ def prepare_condition_occurrence(
target_col="diag_type",
)

# Filter conditions
condition_occurrence = condition_occurrence.rename(
columns={"condition_source_value": "condition_type"}
)
if condition_concept_code and isinstance(condition_concept_code, list):
condition_occurrence = condition_occurrence[
condition_occurrence.condition_source_value.isin(condition_concept_code)
]

if condition_types and isinstance(condition_types, (dict, str)):
condition_occurrence = filter_table_by_type(
table=condition_occurrence,
table_name="condition_occurrence",
type_groups=condition_types,
source_col="condition_type",
source_col="condition_source_value",
target_col="condition_type",
)

Expand Down

0 comments on commit d044ec9

Please sign in to comment.