Merge pull request #19 from jenna-tomkinson/process_features

Process CellProfiler Extracted Features
WayScience · Jul 19, 2023 · ab9aede · ab9aede
2 parents 1de2e80 + fb58cae
commit ab9aede
Show file tree

Hide file tree

Showing 33 changed files with 1,261 additions and 778 deletions.
diff --git a/...alysis_output/Plate_1/nf1_analysis.sqlite → ...utput/Plate_1/Plate_1_nf1_analysis.sqlite b/...alysis_output/Plate_1/nf1_analysis.sqlite → ...utput/Plate_1/Plate_1_nf1_analysis.sqlite
diff --git a/...alysis_output/Plate_2/nf1_analysis.sqlite → ...utput/Plate_2/Plate_2_nf1_analysis.sqlite b/...alysis_output/Plate_2/nf1_analysis.sqlite → ...utput/Plate_2/Plate_2_nf1_analysis.sqlite
diff --git a/...alysis_output/Plate_3/nf1_analysis.sqlite → ...utput/Plate_3/Plate_3_nf1_analysis.sqlite b/...alysis_output/Plate_3/nf1_analysis.sqlite → ...utput/Plate_3/Plate_3_nf1_analysis.sqlite
diff --git a/..._output/Plate_3_prime/nf1_analysis.sqlite → ...3_prime/Plate_3_prime_nf1_analysis.sqlite b/..._output/Plate_3_prime/nf1_analysis.sqlite → ...3_prime/Plate_3_prime_nf1_analysis.sqlite
diff --git a/...alysis_output/Plate_4/nf1_analysis.sqlite → ...utput/Plate_4/Plate_4_nf1_analysis.sqlite b/...alysis_output/Plate_4/nf1_analysis.sqlite → ...utput/Plate_4/Plate_4_nf1_analysis.sqlite
diff --git a/2.cellprofiler_analysis/nf1_analysis.sh b/2.cellprofiler_analysis/nf1_analysis.sh
@@ -5,7 +5,9 @@ conda init bash
 # activate the main conda environment
 conda activate nf1_cellpainting_data
 
-# convert the notebook into a python and run the file
-jupyter nbconvert --to python \
-        --FilesWriter.build_directory=scripts/ \
-        --execute nf1_analysis.ipynb
+# convert all notebooks to python files into the scripts folder
+jupyter nbconvert --to python --output-dir=scripts/ *.ipynb
+
+# run the python scripts in order (CellProfiler analysis then rename SQLite files)
+python scripts/nf1_analysis.py
+python scripts/rename_sqlite_files.py
diff --git a/2.cellprofiler_analysis/rename_sqlite_files.ipynb b/2.cellprofiler_analysis/rename_sqlite_files.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Rename SQLite files from each plate folder to include plate as the prefix\n",
+    "\n",
+    "Due to the name of the SQLite file being hardcoded into the pipeline, the work-around when using `CellProfiler Parallel` is to output the SQLite files into folders with the plate name as to avoid conflicts. The files are renamed after analysis to include the plate prefix."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pathlib"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set path to directory with CellProfiler output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# directory where SQLite files are located in folders per plate\n",
+    "sqlite_dir = pathlib.Path(\"../2.cellprofiler_analysis/analysis_output/\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add plate prefix to all SQLite files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_4/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_4/Plate_4_nf1_analysis.sqlite.\n",
+      "Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_1/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_1/Plate_1_nf1_analysis.sqlite.\n",
+      "Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_3/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_3/Plate_3_nf1_analysis.sqlite.\n",
+      "Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_2/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_2/Plate_2_nf1_analysis.sqlite.\n",
+      "Plate name prefix has been added to ../2.cellprofiler_analysis/analysis_output/Plate_3_prime/nf1_analysis.sqlite. The new name is ../2.cellprofiler_analysis/analysis_output/Plate_3_prime/Plate_3_prime_nf1_analysis.sqlite.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# iterate through all folders in directory to get paths to each SQLite file\n",
+    "for file_path in sqlite_dir.rglob('*.sqlite'):\n",
+    "    # if the SQLite files already start with `Plate`, then the file has already been renamed\n",
+    "    if str(file_path).startswith(\"Plate\"):\n",
+    "        print(f\"{file_path.name} already has the `Plate` prefix, which means it was already corrected.\")\n",
+    "        continue\n",
+    "    # create new file name where the folder name is included as the prefix\n",
+    "    new_file_name = f\"{file_path.parent.name}_{file_path.name}\"\n",
+    "    # create a new path with the new name\n",
+    "    new_path = file_path.with_name(new_file_name)\n",
+    "    # rename all SQLite files by using the new path\n",
+    "    file_path.rename(new_path)\n",
+    "    print(f\"Plate name prefix has been added to {file_path}. The new name is {new_path}.\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nf1_cellpainting_data",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.15"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/2.cellprofiler_analysis/scripts/rename_sqlite_files.py b/2.cellprofiler_analysis/scripts/rename_sqlite_files.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # Rename SQLite files from each plate folder to include plate as the prefix
+# 
+# Due to the name of the SQLite file being hardcoded into the pipeline, the work-around when using `CellProfiler Parallel` is to output the SQLite files into folders with the plate name as to avoid conflicts. The files are renamed after analysis to include the plate prefix.
+
+# ## Import libraries
+
+# In[1]:
+
+
+import pathlib
+
+
+# ## Set path to directory with CellProfiler output
+
+# In[2]:
+
+
+# directory where SQLite files are located in folders per plate
+sqlite_dir = pathlib.Path("../2.cellprofiler_analysis/analysis_output/")
+
+
+# ## Add plate prefix to all SQLite files
+
+# In[3]:
+
+
+# iterate through all folders in directory to get paths to each SQLite file
+for file_path in sqlite_dir.rglob('*.sqlite'):
+    # if the SQLite files already start with `Plate`, then the file has already been renamed
+    if str(file_path).startswith("Plate"):
+        print(f"{file_path.name} already has the `Plate` prefix, which means it was already corrected.")
+        continue
+    # create new file name where the folder name is included as the prefix
+    new_file_name = f"{file_path.parent.name}_{file_path.name}"
+    # create a new path with the new name
+    new_path = file_path.with_name(new_file_name)
+    # rename all SQLite files by using the new path
+    file_path.rename(new_path)
+    print(f"Plate name prefix has been added to {file_path}. The new name is {new_path}.")
+