Skip to content

Commit

Permalink
Merge pull request #19 from jenna-tomkinson/process_features
Browse files Browse the repository at this point in the history
Process CellProfiler Extracted Features
  • Loading branch information
jenna-tomkinson authored Jul 19, 2023
2 parents 1de2e80 + fb58cae commit ab9aede
Show file tree
Hide file tree
Showing 33 changed files with 1,261 additions and 778 deletions.
10 changes: 6 additions & 4 deletions 2.cellprofiler_analysis/nf1_analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ conda init bash
# activate the main conda environment
conda activate nf1_cellpainting_data

# convert the notebook into a python and run the file
jupyter nbconvert --to python \
--FilesWriter.build_directory=scripts/ \
--execute nf1_analysis.ipynb
# convert all notebooks to python files into the scripts folder
jupyter nbconvert --to python --output-dir=scripts/ *.ipynb

# run the python scripts in order (CellProfiler analysis then rename SQLite files)
python scripts/nf1_analysis.py
python scripts/rename_sqlite_files.py
108 changes: 108 additions & 0 deletions 2.cellprofiler_analysis/rename_sqlite_files.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Rename SQLite files from each plate folder to include plate as the prefix\n",
"\n",
"Due to the name of the SQLite file being hardcoded into the pipeline, the work-around when using `CellProfiler Parallel` is to output the SQLite files into folders with the plate name as to avoid conflicts. The files are renamed after analysis to include the plate prefix."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pathlib"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set path to directory with CellProfiler output"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# directory where SQLite files are located in folders per plate\n",
"sqlite_dir = pathlib.Path(\"../2.cellprofiler_analysis/analysis_output/\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add plate prefix to all SQLite files"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_4/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_4/Plate_4_nf1_analysis.sqlite.\n",
"Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_1/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_1/Plate_1_nf1_analysis.sqlite.\n",
"Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_3/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_3/Plate_3_nf1_analysis.sqlite.\n",
"Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_2/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_2/Plate_2_nf1_analysis.sqlite.\n",
"Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_3_prime/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_3_prime/Plate_3_prime_nf1_analysis.sqlite.\n"
]
}
],
"source": [
"# iterate through all folders in directory to get paths to each SQLite file\n",
"for file_path in sqlite_dir.rglob('*.sqlite'):\n",
" # if the SQLite files already start with `Plate`, then the file has already been renamed\n",
" if str(file_path).startswith(\"Plate\"):\n",
" print(f\"{file_path.name} already has the `Plate` prefix, which means it was already corrected.\")\n",
" continue\n",
" # create new file name where the folder name is included as the prefix\n",
" new_file_name = f\"{file_path.parent.name}_{file_path.name}\"\n",
" # create a new path with the new name\n",
" new_path = file_path.with_name(new_file_name)\n",
" # rename all SQLite files by using the new path\n",
" file_path.rename(new_path)\n",
" print(f\"Plate name prefix has been added to {file_path}. The new name is {new_path}.\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "nf1_cellpainting_data",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.15"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
43 changes: 43 additions & 0 deletions 2.cellprofiler_analysis/scripts/rename_sqlite_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python
# coding: utf-8

# # Rename SQLite files from each plate folder to include plate as the prefix
#
# Due to the name of the SQLite file being hardcoded into the pipeline, the work-around when using `CellProfiler Parallel` is to output the SQLite files into folders with the plate name as to avoid conflicts. The files are renamed after analysis to include the plate prefix.

# ## Import libraries

# In[1]:


import pathlib


# ## Set path to directory with CellProfiler output

# In[2]:


# directory where SQLite files are located in folders per plate
sqlite_dir = pathlib.Path("../2.cellprofiler_analysis/analysis_output/")


# ## Add plate prefix to all SQLite files

# In[3]:


# iterate through all folders in directory to get paths to each SQLite file
for file_path in sqlite_dir.rglob('*.sqlite'):
# if the SQLite files already start with `Plate`, then the file has already been renamed
if str(file_path).startswith("Plate"):
print(f"{file_path.name} already has the `Plate` prefix, which means it was already corrected.")
continue
# create new file name where the folder name is included as the prefix
new_file_name = f"{file_path.parent.name}_{file_path.name}"
# create a new path with the new name
new_path = file_path.with_name(new_file_name)
# rename all SQLite files by using the new path
file_path.rename(new_path)
print(f"Plate name prefix has been added to {file_path}. The new name is {new_path}.")

Loading

0 comments on commit ab9aede

Please sign in to comment.