WayScience · MikeLippincott · Nov 15, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,17 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+
+# data folder
+data/*
+Wave1_data/2.illumination_correction/illum_directory/*
+Wave1_data/2.illumination_correction/logs/*
+Wave1_data/3.cellprofiling/analysis_output/*
+Wave1_data/4.processing_profiled_features/notebooks/runinfo/*
+Wave1_data/4.processing_profiled_features/scripts/runinfo/*
+Wave1_data/4.processing_profiled_features/data/*
+# slurm logs
+*.out
+*.err
+*.log
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,49 @@
+repos:
+-   repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
+    rev: v0.6.0post1  # Insert the latest tag here
+    hooks:
+    -   id: pre-commit-update
+        args: [--exclude, black, --keep, isort]
+  # Formats import order
+-   repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+    -   id: isort
+        name: isort (python)
+        args: ["--profile", "black", "--filter-files"]
+
+  #Code formatter for both python files and jupyter notebooks
+-   repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+    -   id: black-jupyter
+    -   id: black
+        language_version: python3.10
+
+-   repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.9.1
+    hooks:
+    -   id: nbqa-isort
+        additional_dependencies: [isort==5.6.4]
+        args: [--profile=black]
+
+
+  # remove unused imports
+-   repo: https://github.com/hadialqattan/pycln.git
+    rev: v2.4.0
+    hooks:
+    -   id: pycln
+
+  # additional hooks found with in the pre-commit lib
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: trailing-whitespace # removes trailing white spaces
+    -   id: mixed-line-ending # removes mixed end of line
+        args:
+        -   --fix=lf
+    -   id: pretty-format-json # JSON Formatter
+        args:
+        -   --autofix
+        -   --indent=4
+        -   --no-sort-keys
diff --git a/Wave1_data/3.cellprofiling/.DS_Store b/Wave1_data/3.cellprofiling/.DS_Store
diff --git a/Wave1_data/3.cellprofiling/notebooks/run_cellprofiler_analysis.ipynb b/Wave1_data/3.cellprofiling/notebooks/run_cellprofiler_analysis.ipynb
@@ -0,0 +1,197 @@
+{
+    "cells": [
+        {
+            "attachments": {},
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "# Perform segmentation and feature extraction for each plate using CellProfiler Parallel"
+            ]
+        },
+        {
+            "attachments": {},
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "## Import libraries"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 1,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import pathlib\n",
+                "import pprint\n",
+                "import sys\n",
+                "import time\n",
+                "\n",
+                "sys.path.append(\"../../../utils/\")\n",
+                "import cp_parallel"
+            ]
+        },
+        {
+            "attachments": {},
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "## Set paths and variables"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 2,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "# set the run type for the parallelization\n",
+                "run_name = \"analysis\"\n",
+                "\n",
+                "# set main output dir for all plates\n",
+                "output_dir = pathlib.Path(\"../analysis_output\")\n",
+                "output_dir.mkdir(exist_ok=True, parents=True)\n",
+                "\n",
+                "# directory where images are located within folders\n",
+                "images_dir = pathlib.Path(\"../../2.illumination_correction/illum_directory/\")\n",
+                "\n",
+                "# path to plugins directory as one of the pipelines uses the RunCellpose plugin\n",
+                "plugins_dir = pathlib.Path(\n",
+                "    \"/home/lippincm/Documents/CellProfiler-plugins/active_plugins\"\n",
+                ")\n",
+                "path_to_pipeline = pathlib.Path(\"../pipelines/analysis_5ch.cppipe\").resolve(strict=True)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "## Create dictionary with all info for each well"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 3,
+            "metadata": {},
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "Number of directories to process:  3\n"
+                    ]
+                }
+            ],
+            "source": [
+                "# get all directories with raw images\n",
+                "dict_of_runs = {}\n",
+                "raw_directories = list(images_dir.rglob(\"*\"))\n",
+                "raw_directories = [x for x in raw_directories if x.is_dir()]\n",
+                "# filter for directories with images\n",
+                "raw_directories = [x for x in raw_directories if len(list(x.glob(\"*.tiff\"))) > 0]\n",
+                "# #####################################\n",
+                "# # for testing purposes\n",
+                "# raw_directories = raw_directories[:2]\n",
+                "# #####################################\n",
+                "\n",
+                "for dir in raw_directories:\n",
+                "    dict_of_runs[dir.name] = {\n",
+                "        \"path_to_images\": str(dir),\n",
+                "        \"path_to_output\": str(pathlib.Path(output_dir / dir.name)),\n",
+                "        \"path_to_pipeline\": str(path_to_pipeline),\n",
+                "    }\n",
+                "print(\"Number of directories to process: \", len(dict_of_runs))"
+            ]
+        },
+        {
+            "attachments": {},
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "## Run analysis pipeline on each plate in parallel\n",
+                "\n",
+                "This cell is not finished to completion due to how long it would take. It is ran in the python file instead."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 4,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "start = time.time()"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 5,
+            "metadata": {},
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "Number of processes: 3\n",
+                        "All processes have been completed!\n",
+                        "0\n",
+                        "0\n",
+                        "0\n",
+                        "All results have been converted to log files!\n"
+                    ]
+                }
+            ],
+            "source": [
+                "cp_parallel.run_cellprofiler_parallel(\n",
+                "    plate_info_dictionary=dict_of_runs,\n",
+                "    run_name=run_name,\n",
+                "    # plugins_dir=plugins_dir,\n",
+                ")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 6,
+            "metadata": {},
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "Total time taken: 00:02:29.75\n"
+                    ]
+                }
+            ],
+            "source": [
+                "end = time.time()\n",
+                "# format the time taken into hours, minutes, seconds\n",
+                "hours, rem = divmod(end - start, 3600)\n",
+                "minutes, seconds = divmod(rem, 60)\n",
+                "print(\n",
+                "    \"Total time taken: {:0>2}:{:0>2}:{:05.2f}\".format(int(hours), int(minutes), seconds)\n",
+                ")"
+            ]
+        }
+    ],
+    "metadata": {
+        "kernelspec": {
+            "display_name": "cellprofiler_timelapse_env",
+            "language": "python",
+            "name": "python3"
+        },
+        "language_info": {
+            "codemirror_mode": {
+                "name": "ipython",
+                "version": 3
+            },
+            "file_extension": ".py",
+            "mimetype": "text/x-python",
+            "name": "python",
+            "nbconvert_exporter": "python",
+            "pygments_lexer": "ipython3",
+            "version": "3.8.20"
+        },
+        "orig_nbformat": 4
+    },
+    "nbformat": 4,
+    "nbformat_minor": 2
+}
diff --git a/Wave1_data/3.cellprofiling/perform_cellprofiling.sh b/Wave1_data/3.cellprofiling/perform_cellprofiling.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=50
+#SBATCH --partition=amilan
+#SBATCH --qos=normal
+#SBATCH --account=amc-general
+#SBATCH --time=8:00:00
+#SBATCH --output=../cp-%j.out
+
+# 50 cores at 3.75 GB of ram per core puts us under the max ram for this node :D
+
+# activate  cellprofiler environment
+module load anaconda
+conda init bash
+conda activate cellprofiler_timelapse_env
+
+jupyter nbconvert --to=script --FilesWriter.build_directory=scripts/ notebooks/*.ipynb
+
+cd scripts/ || exit
+
+python run_cellprofiler_analysis.py
+
+cd .. || exit
+
+# deactivate cellprofiler environment
+conda deactivate
+
+echo "Cellprofiler analysis done"