Skip to content

Commit

Permalink
working fibers with pareto front optimizing bin size and log rank sco…
Browse files Browse the repository at this point in the history
…re, and covariate data generation
  • Loading branch information
cattermelon1234 committed Aug 20, 2024
1 parent 48b393f commit 432b3ec
Show file tree
Hide file tree
Showing 9 changed files with 137,059 additions and 10,346 deletions.
140,774 changes: 136,458 additions & 4,316 deletions FIBERS_Survival_Demo.ipynb

Large diffs are not rendered by default.

5,937 changes: 0 additions & 5,937 deletions paper_analysis_codes/FIBERS_Survival_Covariates_New.ipynb

This file was deleted.

16 changes: 8 additions & 8 deletions src/skfibers/experiments/survival_covariates_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def survival_data_simulation_covariates(instances=10000,total_features=100,predi

#P1_values = []
#P2_values = []
PC1_values = []
TC1_values = []
#PC2_values = [] #test
patient_censoring_times = []
administrative_censoring_times = []
Expand All @@ -48,12 +48,12 @@ def survival_data_simulation_covariates(instances=10000,total_features=100,predi
#P1 = int(random.random() < 0.3)
#P2 = int(random.random() < 0.3)

#PC1 = int(random.random() > 0.5 + recipient_factor/2 + donor_factor/2)
#TC1 = int(random.random() > 0.5 + recipient_factor/2 + donor_factor/2)
#if random.random() > 0.2:
# PC1 = int(random.random() > recipient_factor/2 + donor_factor/2)
# TC1 = int(random.random() > recipient_factor/2 + donor_factor/2)
#else:
# PC1 = int(random.random() > 0.5)
PC1 = int(random.random() > recipient_factor/2 + donor_factor/2)
# TC1 = int(random.random() > 0.5)
TC1 = int(random.random() > recipient_factor/2 + donor_factor/2)
#feature_frequency = random.uniform(feature_frequency_range[0], feature_frequency_range[1]) #test
#PC2 = int(random.random() < feature_frequency) #test

Expand Down Expand Up @@ -84,7 +84,7 @@ def survival_data_simulation_covariates(instances=10000,total_features=100,predi

#P1_values.append(P1)
#P2_values.append(P2)
PC1_values.append(PC1)
TC1_values.append(TC1)
#PC2_values.append(PC2) #test
patient_censoring_times.append(patient_censoring_time)
administrative_censoring_times.append(administrative_censoring_time)
Expand Down Expand Up @@ -118,7 +118,7 @@ def survival_data_simulation_covariates(instances=10000,total_features=100,predi

# Create a DataFrame to store the data
df = pd.DataFrame({
'PC_1': PC1_values,
'TC_1': TC1_values,
#'PC_2': PC2_values, #test
'C_1': recipient_factors,
'C_2': donor_factors,
Expand All @@ -134,7 +134,7 @@ def survival_data_simulation_covariates(instances=10000,total_features=100,predi


data = pd.DataFrame({
'PC_1': PC1_values,
'TC_1': TC1_values,
#'PC_2': PC2_values, #test
'C_1': recipient_factors,
'C_2': donor_factors,
Expand Down
4 changes: 2 additions & 2 deletions src/skfibers/experiments/survival_sim_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def survival_data_simulation(instances=10000, total_features=100, predictive_fea
df.at[i,col] = 1

#for low risk instances, fill in predictive features
for i in range(hr_count,len(low_binary_list)): #for each unique binary combo for low risk
binary_string = low_binary_list[i]
for i in range(hr_count,hr_count + len(low_binary_list)): #for each unique binary combo for low risk
binary_string = low_binary_list[i - hr_count]
for col, value in zip(predictive_names, [int(bit) for bit in binary_string]):
df.at[i, col] = value

Expand Down
280 changes: 268 additions & 12 deletions src/skfibers/fibers.py

Large diffs are not rendered by default.

237 changes: 206 additions & 31 deletions src/skfibers/methods/bin.py

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions src/skfibers/methods/data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
def prepare_data(df,outcome_label,censor_label,covariates):
# Make list of feature names (i.e. columns that are not outcome, censor, or covariates)
feature_names = list(df.columns)
if censor_label != None:
if covariates != None:
exclude = covariates + [outcome_label,censor_label]
else:
exclude = [outcome_label,censor_label]
Expand All @@ -14,6 +14,7 @@ def prepare_data(df,outcome_label,censor_label,covariates):
cols_to_drop = []
for col in feature_names:
if len(df[col].unique()) == 1:
print(df[col])
cols_to_drop.append(col)
df.drop(columns=cols_to_drop, inplace=True)
feature_names = [item for item in feature_names if item not in cols_to_drop]
Expand All @@ -26,7 +27,7 @@ def calculate_residuals(df,covariates,feature_names,outcome_label,censor_label):
# Fit a Cox proportional hazards model to the DataFrame
var_list = covariates+[outcome_label,censor_label]
logging.info("Fitting COX Model")
cph = CoxPHFitter()
cph = CoxPHFitter(penalizer=0.0001)
cph.fit(df.loc[:,var_list], duration_col=outcome_label, event_col=censor_label, show_progress=True)

# Calculate the residuals using the Schoenfeld residuals method
Expand Down
150 changes: 113 additions & 37 deletions src/skfibers/methods/population.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/skfibers/methods/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def plot_adj_HR_metric_product(residuals,bin_pop,show=True,save=False,output_fol


def cox_prop_hazard(bin_df, outcome_label, censor_label): #make bin variable beetween 0 and 1
cph = CoxPHFitter()
cph = CoxPHFitter(penalizer=0.0001)
cph.fit(bin_df,outcome_label,event_col=censor_label, show_progress=False)
return cph.summary

Expand Down

0 comments on commit 432b3ec

Please sign in to comment.