Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LOT use case #145

Merged
merged 26 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
80fb506
ReadMe file for creating dataset from MIMICIII, and training LOS>3 cl…
fazelehh Sep 11, 2024
2a65edb
adding links to readMe file
fazelehh Sep 11, 2024
22b8275
Merge branch 'main' of github.com:aidotse/LeakPro into 113-mimic-iii-…
fazelehh Sep 13, 2024
b1c9331
fixing the LR model
fazelehh Sep 20, 2024
5abcbda
Mimic_Hnadler, data_processeing, model and the main file
fazelehh Oct 8, 2024
5178e32
all attacks are running
fazelehh Oct 8, 2024
deaa51b
add relevant files from MIMIC_Extract and mimic_code repos
fazelehh Oct 10, 2024
a08b7c0
fix some bugs regarding nivduration
fazelehh Oct 11, 2024
0427b29
revert conflicting tabular_mia files
fazelehh Oct 11, 2024
c106d4e
Merge remote-tracking branch 'origin/main' into 113-mimic-iii-data-ha…
fazelehh Oct 14, 2024
f5a273e
adding subset to the mimicDataset
fazelehh Oct 16, 2024
3ae18fd
fixing loading the database and indices
fazelehh Oct 17, 2024
1f84b23
fixing physionet username, breaking the makefile in two commands.
fazelehh Nov 7, 2024
8fa7ece
fixing data and output folder in gitingnore
fazelehh Nov 7, 2024
76be345
adding gitkeep
fazelehh Nov 7, 2024
827f76e
adding gitkeep
fazelehh Nov 7, 2024
cfa70ad
adding gitignore for data
fazelehh Nov 7, 2024
c84300c
adding gitkeep to output
fazelehh Nov 7, 2024
521e9b2
adding gitkeep to output
fazelehh Nov 7, 2024
3ae5d01
adding gitignore for output
fazelehh Nov 7, 2024
c4a6b9a
removing extra readme file
fazelehh Nov 7, 2024
61ff35e
update the readme file
fazelehh Nov 8, 2024
702f62a
fixing copying output to the correct directory
fazelehh Nov 13, 2024
d3c392d
Add .gitignore to ignore contents of the data folder
fazelehh Nov 21, 2024
9a7fb56
removing redundant files, fixind double sigmoid bug
fazelehh Nov 22, 2024
656450c
adding comments in the readmefile
fazelehh Nov 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions examples/mia/LOS/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Ignore everything inside the data folder
data/*

# But do not ignore the .gitkeep file
!data/.gitkeep
43 changes: 43 additions & 0 deletions examples/mia/LOS/audit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
audit: # Configurations for auditing
random_seed: 1234 # Integer specifying the random seed
attack_list:
# rmia:
# training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
# attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack
# num_shadow_models: 8 # Number of shadow models to train
# online: True # perform online or offline attack
# temperature: 2
# gamma: 1.0
# offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
# offline_b: 0.66
# qmia:
# training_data_fraction: 1.0 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor
# epochs: 5 # Number of training epochs for quantile regression
# population:
# attack_data_fraction: 1.0 # Fraction of the auxilary dataset to use for this attack
lira:
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
num_shadow_models: 8 # Number of shadow models to train
online: True # perform online or offline attack
fixed_variance: True # Use a fixed variance for the whole audit
boosting: True
# loss_traj:
# training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
# number_of_traj: 10 # Number of epochs (number of points in the loss trajectory)
# label_only: False # True or False
# mia_classifier_epochs: 100

output_dir: "./leakpro_output"
attack_type: "mia" #mia, gia

target:
# Target model path
module_path: "utils/model.py"
model_class: "MimicLR"
# Data paths
target_folder: "./target"
data_path: "./data/dataset.pkl"

shadow_model:

distillation_model:
Empty file added examples/mia/LOS/data/.gitkeep
Empty file.
65 changes: 65 additions & 0 deletions examples/mia/LOS/mimic_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

import torch
from torch import cuda, device, optim, sigmoid
from torch.nn import BCELoss
from torch.utils.data import DataLoader
from tqdm import tqdm

from leakpro import AbstractInputHandler

class MimicInputHandler(AbstractInputHandler):
"""Class to handle the user input for the CIFAR10 dataset."""

def __init__(self, configs: dict) -> None:
super().__init__(configs = configs)


def get_criterion(self)->None:
"""Set the CrossEntropyLoss for the model."""
return BCELoss()

def get_optimizer(self, model:torch.nn.Module) -> None:
"""Set the optimizer for the model."""
learning_rate = 0.1
momentum = 0.8
return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

def train(
self,
dataloader: DataLoader,
model: torch.nn.Module = None,
criterion: torch.nn.Module = None,
optimizer: optim.Optimizer = None,
epochs: int = None,
) -> dict:
"""Model training procedure."""

compute_device = device("cuda" if cuda.is_available() else "cpu")
model.to(compute_device)
model.train()

criterion = self.get_criterion()
optimizer = self.get_optimizer(model)

for e in tqdm(range(epochs), desc="Training Progress"):
model.train()
train_acc, train_loss = 0.0, 0.0

for data, target in dataloader:
target = target.float().unsqueeze(1)
data, target = data.to(compute_device, non_blocking=True), target.to(compute_device, non_blocking=True)
optimizer.zero_grad()
output = model(data)

loss = criterion(output, target)
pred = sigmoid(output) >= 0.5
train_acc += pred.eq(target).sum().item()

loss.backward()
optimizer.step()
train_loss += loss.item()

train_acc = train_acc/len(dataloader.dataset)
train_loss = train_loss/len(dataloader)

return {"model": model, "metrics": {"accuracy": train_acc, "loss": train_loss}}
278 changes: 278 additions & 0 deletions examples/mia/LOS/mimic_main.ipynb

Large diffs are not rendered by default.

118 changes: 118 additions & 0 deletions examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Ignore all contents of output
/output/*

# Exclude the folder itself so it stay in the repository
!/output/.gitkeep


# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/


*.ipynb_checkpoints*
makejob

# tags
tags
21 changes: 21 additions & 0 deletions examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2019 MLforHealth

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SET SEARCH_PATH TO public,mimiciii;
SELECT
i.icustay_id, d.subject_id, d.hadm_id,
array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes
FROM diagnoses_icd d
LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from ccs_dx) c
ON c.icd9_code = d.icd9_code
INNER JOIN icustays i
ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id
WHERE d.hadm_id IN ('{hadm_id}') AND seq_num IS NOT NULL
GROUP BY i.icustay_id, d.subject_id, d.hadm_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
\echo "DEBUG ONLY"
SET search_path TO mimiciii;
SELECT
i.icustay_id, d.subject_id, d.hadm_id,
array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes,
array_agg(c.ccs_matched_id ORDER BY seq_num ASC) AS ccs_codes
FROM mimiciii.diagnoses_icd d
LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from mimiciii.ccs_dx) c
ON c.icd9_code = d.icd9_code
INNER JOIN icustays i
ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id

WHERE seq_num IS NOT NULL
GROUP BY i.icustay_id, d.subject_id, d.hadm_id

Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
\echo "This file is just for debugging"
SET search_path TO public,mimiciii;
select distinct
i.subject_id,
i.hadm_id,
i.icustay_id,
i.gender,
i.age as age,
i.ethnicity,
i.admission_type,
i.hospital_expire_flag,
i.hospstay_seq,
i.los_icu,
i.admittime,
i.dischtime,
i.intime,
i.outtime,
a.diagnosis AS diagnosis_at_admission,
a.insurance,
a.deathtime,
a.discharge_location,
CASE when a.deathtime between i.intime and i.outtime THEN 1 ELSE 0 END AS mort_icu,
CASE when a.deathtime between i.admittime and i.dischtime THEN 1 ELSE 0 END AS mort_hosp,
s.first_careunit,
c.fullcode_first,
c.dnr_first,
c.fullcode,
c.dnr,
-- c.timednr_chart,
c.dnr_first_charttime,
c.cmo_first,
c.cmo_last,
c.cmo,
c.cmo_ds,
-- c.timecmo_chart,
c.cmo_first_charttime,
-- c.timecmo_nursingnote,
c.cmo_nursingnote_charttime,
sofa.sofa,
sofa.respiration as sofa_,
sofa.coagulation as sofa_,
sofa.liver as sofa_,
sofa.cardiovascular as sofa_,
sofa.cns as sofa_,
sofa.renal as sofa_,
sapsii.sapsii,
sapsii.sapsii_prob,
oasis.oasis,
oasis.oasis_prob,
COALESCE(f.readmission_30, 0) AS readmission_30
FROM icustay_detail i
INNER JOIN admissions a ON i.hadm_id = a.hadm_id
INNER JOIN icustays s ON i.icustay_id = s.icustay_id
INNER JOIN code_status c ON i.icustay_id = c.icustay_id
LEFT OUTER JOIN (SELECT d.icustay_id, 1 as readmission_30
FROM icustays c, icustays d
WHERE c.subject_id=d.subject_id
AND c.icustay_id > d.icustay_id
AND c.intime - d.outtime <= interval '30 days'
AND c.outtime = (SELECT MIN(e.outtime) from icustays e
WHERE e.subject_id=c.subject_id
AND e.intime>d.outtime)) f
ON i.icustay_id=f.icustay_id
LEFT OUTER JOIN (SELECT icustay_id, sofa, respiration, coagulation, liver, cardiovascular, cns, renal
FROM sofa) sofa
ON i.icustay_id=sofa.icustay_id
LEFT OUTER JOIN (SELECT icustay_id, sapsii, sapsii_prob
FROM sapsii) sapsii
ON sapsii.icustay_id=i.icustay_id
LEFT OUTER JOIN (SELECT icustay_id, oasis, oasis_prob
FROM oasis) oasis
ON oasis.icustay_id=i.icustay_id
WHERE s.first_careunit NOT like 'NICU'
and i.hadm_id is not null and i.icustay_id is not null
and i.hospstay_seq = 1
and i.icustay_seq = 1
and i.age >= 16
and i.los_icu >= 1
and (i.outtime >= (i.intime + interval '12 hours'))
and (i.outtime <= (i.intime + interval '250 hours'))
ORDER BY subject_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
SELECT n.subject_id, n.hadm_id, i.icustay_id, n.chartdate, n.charttime, n.category, n.description, n.text
FROM noteevents n INNER JOIN icustays i on i.hadm_id = n.hadm_id
WHERE
iserror IS NULL
AND (n.chartdate <= i.outtime OR n.charttime <= i.outtime)
AND n.hadm_id IN ('{hadm_id}')
AND n.subject_id IN ('{subject_id}')
Loading
Loading