WayScience · jenna-tomkinson · Feb 7, 2024 · Jan 31, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/1.cellprofiler_ic/README.md b/1.cellprofiler_ic/README.md
@@ -1,9 +1,28 @@
-# Perform illumination correction and save corrected images
+# Perform whole image quality control and illumination correction and save corrected images
 
-In this module, we perform illumination correction (IC) on images for each plate and save the corrected images into new folders. 
+In this module, we perform whole image quality control (QC) evalutation, illumination correction (IC) on images for each plate and save the corrected images into new folders. 
 Images are saved as 16-bit depth, which is the same as the raw data.
 
-## CellProfiler Pipeline
+## Whole image QC
+
+To remove poor quality images, we use the `MeasureImageQuality` module from CellProfiler to extract blur and saturation metrics and generate optimal thresholds for detection using the z-scoring method.
+
+Blur metrics will detect both out-of-focus and empty images.
+Saturation metrics will detect large artifacts or overly saturated/blown out channels.
+
+### Run the `image_quality_control` notebooks
+
+To process, evaluate, and generate a QC report for the NF1 data, you can run the below script to run the notebooks.
+
+```bash
+# Run this script in terminal
+# move to the 1.cellprofiler_ic directory to access the `sh` script
+cd 1.cellprofiler_ic
+# run the notebook as a python script
+source nf1_quality_control.sh
+```
+
+## Illumination Correction
 
 Due to there being differences in channel number between plates, we have two different illumination correction pipelines.
 
@@ -13,7 +32,7 @@ Due to there being differences in channel number between plates, we have two dif
 **Note:** The parameters for correction between pipelines in the same channel might be slightly different. 
 But, the parameters in both perform and output the best corrected images at this point
 
-## Run the `nf1_ic` notebook
+### Run the `nf1_ic` notebook
 
 To calculate and apply an IC function on each channel, run the [nf1_ic.ipynb](nf1_ic.ipynb) notebook as a python script using the code block below:
 

diff --git a/1.cellprofiler_ic/image_quality_control/0.whole_image_qc.ipynb b/1.cellprofiler_ic/image_quality_control/0.whole_image_qc.ipynb
@@ -0,0 +1,175 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Run whole image QC pipeline in CellProfiler\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pathlib\n",
+    "import pprint\n",
+    "\n",
+    "import sys\n",
+    "\n",
+    "sys.path.append(\"../../utils\")\n",
+    "import cp_parallel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set paths and variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Plate_4', 'Plate_1', 'Plate_3', 'Plate_2', 'Plate_5', 'Plate_3_prime']\n",
+      "There are a total of 6 plates. The names of the plates are:\n",
+      "Plate_4\n",
+      "Plate_1\n",
+      "Plate_3\n",
+      "Plate_2\n",
+      "Plate_5\n",
+      "Plate_3_prime\n"
+     ]
+    }
+   ],
+   "source": [
+    "# set the run type for the parallelization\n",
+    "run_name = \"quality_control\"\n",
+    "\n",
+    "# set path for pipeline for illumination correction\n",
+    "path_to_pipeline = pathlib.Path(\"../pipelines/whole_image_qc.cppipe\").resolve(strict=True)\n",
+    "\n",
+    "# set main output dir for all plates if it doesn't exist\n",
+    "output_dir = pathlib.Path(\"./qc_results\")\n",
+    "output_dir.mkdir(exist_ok=True)\n",
+    "\n",
+    "# directory where images are located within folders\n",
+    "images_dir = pathlib.Path(\"../../0.download_data\")\n",
+    "\n",
+    "# list for plate names based on folders to use to create dictionary\n",
+    "plate_names = []\n",
+    "# iterate through 0.download_data and append plate names from folder names that contain image data from that plate\n",
+    "for file_path in images_dir.iterdir():\n",
+    "    if str(file_path.stem).startswith(\"Plate\"):\n",
+    "        plate_names.append(str(file_path.stem))\n",
+    "\n",
+    "print(plate_names)\n",
+    "print(\"There are a total of\", len(plate_names), \"plates. The names of the plates are:\")\n",
+    "for plate in plate_names:\n",
+    "    print(plate)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate dictionary with plate info to run CellProfiler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{   'Plate_3': {   'path_to_images': PosixPath('/home/jenna/nf1_cellpainting_data/0.download_data/Plate_3'),\n",
+      "                   'path_to_output': PosixPath('qc_results/Plate_3'),\n",
+      "                   'path_to_pipeline': PosixPath('/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/pipelines/whole_image_qc.cppipe')},\n",
+      "    'Plate_3_prime': {   'path_to_images': PosixPath('/home/jenna/nf1_cellpainting_data/0.download_data/Plate_3_prime'),\n",
+      "                         'path_to_output': PosixPath('qc_results/Plate_3_prime'),\n",
+      "                         'path_to_pipeline': PosixPath('/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/pipelines/whole_image_qc.cppipe')},\n",
+      "    'Plate_4': {   'path_to_images': PosixPath('/home/jenna/nf1_cellpainting_data/0.download_data/Plate_4'),\n",
+      "                   'path_to_output': PosixPath('qc_results/Plate_4'),\n",
+      "                   'path_to_pipeline': PosixPath('/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/pipelines/whole_image_qc.cppipe')},\n",
+      "    'Plate_5': {   'path_to_images': PosixPath('/home/jenna/nf1_cellpainting_data/0.download_data/Plate_5'),\n",
+      "                   'path_to_output': PosixPath('qc_results/Plate_5'),\n",
+      "                   'path_to_pipeline': PosixPath('/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/pipelines/whole_image_qc.cppipe')}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create plate info dictionary with all parts of the CellProfiler CLI command to run in parallel\n",
+    "plate_info_dictionary = {\n",
+    "    name: {\n",
+    "        \"path_to_images\": pathlib.Path(list(images_dir.rglob(name))[0]).resolve(\n",
+    "            strict=True\n",
+    "        ),\n",
+    "        \"path_to_output\": pathlib.Path(f\"{output_dir}/{name}\"),\n",
+    "        \"path_to_pipeline\": path_to_pipeline,\n",
+    "\n",
+    "    }\n",
+    "    for name in plate_names if not name=='Plate_1' and not name=='Plate_2' # Do not include pilot datasets\n",
+    "}\n",
+    "\n",
+    "# view the dictionary to assess that all info is added correctly\n",
+    "pprint.pprint(plate_info_dictionary, indent=4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run QC pipeline in CellProfiler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cp_parallel.run_cellprofiler_parallel(\n",
+    "    plate_info_dictionary=plate_info_dictionary, run_name=run_name\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cfret_data",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}