Skip to content

Commit

Permalink
test run
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Nov 26, 2024
1 parent 94c79b7 commit b1a4b0c
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 25 deletions.
68 changes: 47 additions & 21 deletions runs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,23 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"upload: resources_test/datasets_raw/op_perturbation_sc_counts.h5ad to s3://openproblems-data/resources_test/grn/datasets_raw/op_perturbation_sc_counts.h5ad\n",
"upload: resources_test/datasets_raw/op_multiome_sc_counts.h5ad to s3://openproblems-data/resources_test/grn/datasets_raw/op_multiome_sc_counts.h5ad\n",
"upload: resources_test/inference_datasets/op_rna.h5ad to s3://openproblems-data/resources_test/grn/inference_datasets/op_rna.h5ad\n",
"upload: resources_test/evaluation_datasets/op_perturbation.h5ad to s3://openproblems-data/resources_test/grn/evaluation_datasets/op_perturbation.h5ad\n",
"upload: resources_test/inference_datasets/op_atac.h5ad to s3://openproblems-data/resources_test/grn/inference_datasets/op_atac.h5ad\n"
]
}
],
"source": [
"# !aws s3 sync resources_test/ s3://openproblems-data/resources_test/grn/ --delete\n",
"!aws s3 sync resources_test/ s3://openproblems-data/resources_test/grn/ --delete\n",
"# !aws s3 sync resources/ s3://openproblems-data/resources/grn/ --delete"
]
},
Expand All @@ -43,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -81,11 +93,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"AnnData object with n_obs × n_vars = 25551 × 22787\n",
" obs: 'cell_type', 'donor_id'\n",
" var: 'gene_ids', 'interval'\n",
" layers: 'X_norm', 'counts'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ad.read('resources/')"
"ad.read('resources/inference_datasets/op_rna.h5ad')"
]
},
{
Expand Down Expand Up @@ -144,14 +170,14 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Submitted batch job 7845525\n"
"Submitted batch job 7849254\n"
]
}
],
Expand Down Expand Up @@ -195,7 +221,7 @@
}
],
"source": [
"!ls output/temp/op/"
"!ls resources/scores/op/"
]
},
{
Expand Down Expand Up @@ -578,7 +604,7 @@
}
],
"source": [
"df_scores = pd.read_csv(f\"output/temp/op/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/op/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"# df_scores[df_scores<0] = 0\n",
"df_all_n = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
"df_scores['rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n",
Expand Down Expand Up @@ -935,7 +961,7 @@
}
],
"source": [
"df_scores = pd.read_csv(f\"output/temp/op/50000-skeleton_False-binarize_True-GB.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/op/50000-skeleton_False-binarize_True-GB.csv\", index_col=0)\n",
"# df_scores[df_scores<0] = 0\n",
"df_all_n = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
"df_scores['rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n",
Expand Down Expand Up @@ -1164,7 +1190,7 @@
}
],
"source": [
"df_scores = pd.read_csv(f\"output/temp/replogle2/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/replogle2/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"# df_scores[df_scores<0] = 0\n",
"\n",
"df_scores_f = df_scores[['static-theta-0.0', 'static-theta-0.5', 'static-theta-1.0']]\n",
Expand Down Expand Up @@ -1403,7 +1429,7 @@
}
],
"source": [
"df_scores = pd.read_csv(f\"output/temp/nakatake/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/nakatake/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"# df_scores[df_scores<0] = 0\n",
"df_scores_f = df_scores[['static-theta-0.0', 'static-theta-0.5', 'static-theta-1.0']]\n",
"df_all_n = (df_scores_f-df_scores_f.min(axis=0))/(df_scores_f.max(axis=0)-df_scores_f.min(axis=0))\n",
Expand Down Expand Up @@ -1649,7 +1675,7 @@
}
],
"source": [
"df_scores = pd.read_csv(f\"output/temp/norman/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/norman/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
"# df_scores[df_scores<0] = 0\n",
"df_scores_f = df_scores[['static-theta-0.0', 'static-theta-0.5', 'static-theta-1.0']]\n",
"df_all_n = (df_scores_f-df_scores_f.min(axis=0))/(df_scores_f.max(axis=0)-df_scores_f.min(axis=0))\n",
Expand Down Expand Up @@ -1684,7 +1710,7 @@
}
],
"source": [
"!ls output/temp/adamson/"
"!ls resources/scores/adamson/"
]
},
{
Expand Down Expand Up @@ -1884,7 +1910,7 @@
}
],
"source": [
"df_scores = pd.read_csv(f\"output/temp/adamson/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/adamson/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
"# df_scores[df_scores<0] = 0\n",
"df_scores_f = df_scores[['static-theta-0.0', 'static-theta-0.5', 'static-theta-1.0']]\n",
"df_all_n = (df_scores_f-df_scores_f.min(axis=0))/(df_scores_f.max(axis=0)-df_scores_f.min(axis=0))\n",
Expand Down Expand Up @@ -1930,7 +1956,7 @@
}
],
"source": [
"!ls output/temp/op/"
"!ls resources/scores/op/"
]
},
{
Expand Down Expand Up @@ -2511,8 +2537,8 @@
}
],
"source": [
"df_scores_gb = pd.read_csv(f\"output/temp/op/X_norm-50000-skeleton_False-binarize_True-ridge-global-True.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"output/temp/op/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
"df_scores_gb = pd.read_csv(f\"resources/scores/op/X_norm-50000-skeleton_False-binarize_True-ridge-global-True.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/op/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
"\n",
"df_scores = pd.concat([df_scores, df_scores_gb])\n",
"# df_scores[df_scores<0] = 0\n",
Expand Down Expand Up @@ -2580,7 +2606,7 @@
"source": [
"# - collect all the scores\n",
"for i, dataset in enumerate(datasets):\n",
" df_scores = pd.read_csv(f\"output/temp/{dataset}/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
" df_scores = pd.read_csv(f\"resources/scores/{dataset}/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
" # - normalize scores \n",
" df_scores = df_scores.fillna(0)\n",
" df_scores[df_scores < 0] = 0\n",
Expand Down
4 changes: 2 additions & 2 deletions src/metrics/script_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def define_par(dataset):
global_models = False

# - run metrics
for dataset in ['adamson']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson'
for dataset in ['op']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson'
print('------ ', dataset, '------')
par = define_par(dataset)
os.makedirs(par['scores_dir'], exist_ok=True)
Expand All @@ -78,7 +78,7 @@ def define_par(dataset):
par['binarize'] = binarize
for max_n_links in [50000]:
par['max_n_links'] = max_n_links
for apply_skeleton in [False]:
for apply_skeleton in [True]:
par['apply_skeleton'] = apply_skeleton
# - determines models to run
grn_files_dict = {}
Expand Down
6 changes: 4 additions & 2 deletions src/process_data/multiomics/format_data/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
multiomics.X = multiomics.layers['counts']
del multiomics.layers
multiomics.layers['counts'] = multiomics.X.copy()
X_norm = sc.pp.normalize_total(multiomics, inplace=False)['X']
multiomics.layers['X_norm'] = sc.pp.log1p(X_norm, copy=True)

multiomics.var.index.name='location'
multiomics.obs.index.name='obs_id'
Expand Down Expand Up @@ -58,5 +56,9 @@
multiomics_rna.obs['donor_id'] = multiomics_rna.obs['donor_id'].map(donor_map)
multiomics_atac.obs['donor_id'] = multiomics_atac.obs['donor_id'].map(donor_map)

# normalize rna
X_norm = sc.pp.normalize_total(multiomics_rna, inplace=False)['X']
multiomics_rna.layers['X_norm'] = sc.pp.log1p(X_norm, copy=True)

multiomics_rna.write(par['multiomics_rna'])
multiomics_atac.write(par['multiomics_atac'])

0 comments on commit b1a4b0c

Please sign in to comment.