Skip to content

Commit

Permalink
Revert MIMIC preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-chai committed Jan 13, 2025
1 parent 0f3b7c4 commit 97b9c60
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 13 deletions.
7 changes: 1 addition & 6 deletions experiments/mimiciv/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,7 @@ docker cp esgpt:/var/lib/postgresql/MIMICIV_FMs_public/data/hotpp_cohort/dynamic
```
pip install git+https://github.com/xiyori/ICD-Mappings.git
```
2. Download GEM mappings:
```
wget https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2013/DiagnosisGEMs_2013.zip
unzip DiagnosisGEMs_2013.zip -d data
```
3. Run data preparation script:
2. Run data preparation script:
```
spark-submit --master 'local[8]' scripts/make-dataset.py
```
9 changes: 2 additions & 7 deletions experiments/mimiciv/scripts/make-dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.parquet as pq
import pandas as pd
from random import Random

import pyspark.sql.functions as F
Expand Down Expand Up @@ -57,17 +56,13 @@ def simplify_measurements(src, dst):

def map_codes(df):
"""Convert ICD 9 to ICD 10 chapters where possible."""
gem = pd.read_csv("data/2013_I9gem.txt", header=None, delimiter=r"\s+")
gem = gem.sort_values(by=2, axis=0).groupby(0).first().replace({1: {"NoDx": None}})[1]
gem_map = dict(gem)
mapper = Mapper()
def map_icd(name):
if name == "UNK":
return name
src, value = name.split()
if src == "ICD_9": # convert to ICD-10 value before converting to chapter
value = gem_map[value]
return "CH-" + mapper.map(value, source="icd10", target="chapter")
src = src.replace("_", "").lower()
return "CH-" + mapper.map(value, source=src, target="chapter")
icd_mapper = F.udf(map_icd, returnType=StringType())
df = df.withColumn("labels", icd_mapper(F.col("label"))).drop("label")
return df
Expand Down

0 comments on commit 97b9c60

Please sign in to comment.