From b128fe26f144029ced48efc0685da54fa35aa5a6 Mon Sep 17 00:00:00 2001 From: Cameron Mattson Date: Mon, 25 Nov 2024 15:01:15 -0700 Subject: [PATCH 1/2] Updated to include feature for dropping any and all columns. Fixed bug tracking groups in PairwiseCompare. Simplified the Pearsons Correlation module. --- docs/example_pairwise_comparisons.ipynb | 310 ++++++++++++------------ src/comparators/PearsonsCorrelation.py | 11 +- src/comparison_tools/PairwiseCompare.py | 152 ++++++++---- 3 files changed, 267 insertions(+), 206 deletions(-) diff --git a/docs/example_pairwise_comparisons.ipynb b/docs/example_pairwise_comparisons.ipynb index a35af50..4cd1a47 100644 --- a/docs/example_pairwise_comparisons.ipynb +++ b/docs/example_pairwise_comparisons.ipynb @@ -16,10 +16,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:08:48.308492Z", - "iopub.status.busy": "2024-11-21T17:08:48.308382Z", - "iopub.status.idle": "2024-11-21T17:08:48.503263Z", - "shell.execute_reply": "2024-11-21T17:08:48.502921Z" + "iopub.execute_input": "2024-11-25T21:58:40.882767Z", + "iopub.status.busy": "2024-11-25T21:58:40.882544Z", + "iopub.status.idle": "2024-11-25T21:58:41.115188Z", + "shell.execute_reply": "2024-11-25T21:58:41.114809Z" }, "jukit_cell_id": "JlHVCFc6jg" }, @@ -45,10 +45,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:08:48.504943Z", - "iopub.status.busy": "2024-11-21T17:08:48.504749Z", - "iopub.status.idle": "2024-11-21T17:08:48.507703Z", - "shell.execute_reply": "2024-11-21T17:08:48.507435Z" + "iopub.execute_input": "2024-11-25T21:58:41.116890Z", + "iopub.status.busy": "2024-11-25T21:58:41.116700Z", + "iopub.status.idle": "2024-11-25T21:58:41.119424Z", + "shell.execute_reply": "2024-11-25T21:58:41.119123Z" }, "jukit_cell_id": "KnQdvFxPkd" }, @@ -72,10 +72,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:08:48.508823Z", - "iopub.status.busy": "2024-11-21T17:08:48.508707Z", - "iopub.status.idle": "2024-11-21T17:08:48.562915Z", - "shell.execute_reply": "2024-11-21T17:08:48.562585Z" + "iopub.execute_input": "2024-11-25T21:58:41.120529Z", + "iopub.status.busy": "2024-11-25T21:58:41.120413Z", + "iopub.status.idle": "2024-11-25T21:58:41.177428Z", + "shell.execute_reply": "2024-11-25T21:58:41.177064Z" }, "jukit_cell_id": "dlU1p2c9cE" }, @@ -102,10 +102,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:08:48.564275Z", - "iopub.status.busy": "2024-11-21T17:08:48.564115Z", - "iopub.status.idle": "2024-11-21T17:08:48.575822Z", - "shell.execute_reply": "2024-11-21T17:08:48.575542Z" + "iopub.execute_input": "2024-11-25T21:58:41.178861Z", + "iopub.status.busy": "2024-11-25T21:58:41.178700Z", + "iopub.status.idle": "2024-11-25T21:58:41.191568Z", + "shell.execute_reply": "2024-11-25T21:58:41.191279Z" }, "jukit_cell_id": "gQ5lHlQ04X" }, @@ -393,10 +393,10 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:08:48.605253Z", - "iopub.status.busy": "2024-11-21T17:08:48.604917Z", - "iopub.status.idle": "2024-11-21T17:08:48.607235Z", - "shell.execute_reply": "2024-11-21T17:08:48.606956Z" + "iopub.execute_input": "2024-11-25T21:58:41.216616Z", + "iopub.status.busy": "2024-11-25T21:58:41.216314Z", + "iopub.status.idle": "2024-11-25T21:58:41.218695Z", + "shell.execute_reply": "2024-11-25T21:58:41.218413Z" }, "jukit_cell_id": "ybX13ZBK8Q" }, @@ -443,10 +443,10 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:08:48.608235Z", - "iopub.status.busy": "2024-11-21T17:08:48.608136Z", - "iopub.status.idle": "2024-11-21T17:09:10.774706Z", - "shell.execute_reply": "2024-11-21T17:09:10.774353Z" + "iopub.execute_input": "2024-11-25T21:58:41.220041Z", + "iopub.status.busy": "2024-11-25T21:58:41.219819Z", + "iopub.status.idle": "2024-11-25T21:59:06.064614Z", + "shell.execute_reply": "2024-11-25T21:59:06.064173Z" }, "jukit_cell_id": "CRa8vLBRj8" }, @@ -469,10 +469,10 @@ "execution_count": 7, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:10.776378Z", - "iopub.status.busy": "2024-11-21T17:09:10.776248Z", - "iopub.status.idle": "2024-11-21T17:09:10.781129Z", - "shell.execute_reply": "2024-11-21T17:09:10.780827Z" + "iopub.execute_input": "2024-11-25T21:59:06.066241Z", + "iopub.status.busy": "2024-11-25T21:59:06.066107Z", + "iopub.status.idle": "2024-11-25T21:59:06.071362Z", + "shell.execute_reply": "2024-11-25T21:59:06.071030Z" }, "jukit_cell_id": "cGLOXfXNrN" }, @@ -587,10 +587,10 @@ "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:10.782298Z", - "iopub.status.busy": "2024-11-21T17:09:10.782186Z", - "iopub.status.idle": "2024-11-21T17:09:10.784313Z", - "shell.execute_reply": "2024-11-21T17:09:10.784023Z" + "iopub.execute_input": "2024-11-25T21:59:06.072907Z", + "iopub.status.busy": "2024-11-25T21:59:06.072623Z", + "iopub.status.idle": "2024-11-25T21:59:06.074752Z", + "shell.execute_reply": "2024-11-25T21:59:06.074457Z" }, "jukit_cell_id": "qzMvL0c8sb" }, @@ -614,10 +614,10 @@ "execution_count": 9, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:10.785407Z", - "iopub.status.busy": "2024-11-21T17:09:10.785297Z", - "iopub.status.idle": "2024-11-21T17:09:10.787247Z", - "shell.execute_reply": "2024-11-21T17:09:10.786952Z" + "iopub.execute_input": "2024-11-25T21:59:06.075894Z", + "iopub.status.busy": "2024-11-25T21:59:06.075721Z", + "iopub.status.idle": "2024-11-25T21:59:06.077633Z", + "shell.execute_reply": "2024-11-25T21:59:06.077342Z" }, "jukit_cell_id": "7modF1v2ll" }, @@ -649,10 +649,10 @@ "execution_count": 10, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:10.788463Z", - "iopub.status.busy": "2024-11-21T17:09:10.788240Z", - "iopub.status.idle": "2024-11-21T17:09:12.846748Z", - "shell.execute_reply": "2024-11-21T17:09:12.846351Z" + "iopub.execute_input": "2024-11-25T21:59:06.078770Z", + "iopub.status.busy": "2024-11-25T21:59:06.078592Z", + "iopub.status.idle": "2024-11-25T21:59:08.390094Z", + "shell.execute_reply": "2024-11-25T21:59:08.389654Z" }, "jukit_cell_id": "JPLwsPbVsU" }, @@ -676,10 +676,10 @@ "execution_count": 11, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:12.848417Z", - "iopub.status.busy": "2024-11-21T17:09:12.848289Z", - "iopub.status.idle": "2024-11-21T17:09:12.854232Z", - "shell.execute_reply": "2024-11-21T17:09:12.853940Z" + "iopub.execute_input": "2024-11-25T21:59:08.391840Z", + "iopub.status.busy": "2024-11-25T21:59:08.391712Z", + "iopub.status.idle": "2024-11-25T21:59:08.397305Z", + "shell.execute_reply": "2024-11-25T21:59:08.397005Z" }, "jukit_cell_id": "lFR01EXVLC" }, @@ -708,95 +708,95 @@ " pearsons_correlation\n", " Metadata_Concentration__antehoc_group0\n", " Metadata_Concentration__antehoc_group1\n", - " Metadata_siRNA__posthoc_group0\n", - " Metadata_siRNA__posthoc_group1\n", " Metadata_Well__posthoc_group0\n", " Metadata_Well__posthoc_group1\n", + " Metadata_siRNA__posthoc_group0\n", + " Metadata_siRNA__posthoc_group1\n", " \n", " \n", " \n", " \n", " 0\n", - " 0.128001\n", - " (0.001,)\n", - " (0.001,)\n", - " NF1 Target 1\n", - " NF1 Target 2\n", + " -0.258292\n", + " 0.001\n", + " 0.001\n", + " E10\n", " F10\n", - " G10\n", + " Scramble\n", + " NF1 Target 1\n", " \n", " \n", " 1\n", - " 0.190290\n", - " (0.001,)\n", - " (0.001,)\n", + " -0.011791\n", + " 0.001\n", + " 0.001\n", + " E10\n", + " F4\n", + " Scramble\n", " NF1 Target 1\n", - " NF1 Target 2\n", - " F10\n", - " G4\n", " \n", " \n", " 2\n", - " 0.173737\n", - " (0.001,)\n", - " (0.001,)\n", + " 0.188021\n", + " 0.001\n", + " 0.001\n", + " E10\n", + " F7\n", + " Scramble\n", " NF1 Target 1\n", - " NF1 Target 2\n", - " F10\n", - " G7\n", " \n", " \n", " 3\n", - " -0.258292\n", - " (0.001,)\n", - " (0.001,)\n", - " NF1 Target 1\n", - " Scramble\n", - " F10\n", + " -0.045444\n", + " 0.001\n", + " 0.001\n", " E10\n", + " G10\n", + " Scramble\n", + " NF1 Target 2\n", " \n", " \n", " 4\n", - " 0.089742\n", - " (0.001,)\n", - " (0.001,)\n", - " NF1 Target 1\n", + " -0.081625\n", + " 0.001\n", + " 0.001\n", + " E10\n", + " G4\n", " Scramble\n", - " F10\n", - " E4\n", + " NF1 Target 2\n", " \n", " \n", "\n", "" ], "text/plain": [ - " pearsons_correlation Metadata_Concentration__antehoc_group0 \\\n", - "0 0.128001 (0.001,) \n", - "1 0.190290 (0.001,) \n", - "2 0.173737 (0.001,) \n", - "3 -0.258292 (0.001,) \n", - "4 0.089742 (0.001,) \n", + " pearsons_correlation Metadata_Concentration__antehoc_group0 \\\n", + "0 -0.258292 0.001 \n", + "1 -0.011791 0.001 \n", + "2 0.188021 0.001 \n", + "3 -0.045444 0.001 \n", + "4 -0.081625 0.001 \n", "\n", - " Metadata_Concentration__antehoc_group1 Metadata_siRNA__posthoc_group0 \\\n", - "0 (0.001,) NF1 Target 1 \n", - "1 (0.001,) NF1 Target 1 \n", - "2 (0.001,) NF1 Target 1 \n", - "3 (0.001,) NF1 Target 1 \n", - "4 (0.001,) NF1 Target 1 \n", + " Metadata_Concentration__antehoc_group1 Metadata_Well__posthoc_group0 \\\n", + "0 0.001 E10 \n", + "1 0.001 E10 \n", + "2 0.001 E10 \n", + "3 0.001 E10 \n", + "4 0.001 E10 \n", "\n", - " Metadata_siRNA__posthoc_group1 Metadata_Well__posthoc_group0 \\\n", - "0 NF1 Target 2 F10 \n", - "1 NF1 Target 2 F10 \n", - "2 NF1 Target 2 F10 \n", - "3 Scramble F10 \n", - "4 Scramble F10 \n", + " Metadata_Well__posthoc_group1 Metadata_siRNA__posthoc_group0 \\\n", + "0 F10 Scramble \n", + "1 F4 Scramble \n", + "2 F7 Scramble \n", + "3 G10 Scramble \n", + "4 G4 Scramble \n", "\n", - " Metadata_Well__posthoc_group1 \n", - "0 G10 \n", - "1 G4 \n", - "2 G7 \n", - "3 E10 \n", - "4 E4 " + " Metadata_siRNA__posthoc_group1 \n", + "0 NF1 Target 1 \n", + "1 NF1 Target 1 \n", + "2 NF1 Target 1 \n", + "3 NF1 Target 2 \n", + "4 NF1 Target 2 " ] }, "execution_count": 11, @@ -813,10 +813,10 @@ "execution_count": 12, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:12.855400Z", - "iopub.status.busy": "2024-11-21T17:09:12.855290Z", - "iopub.status.idle": "2024-11-21T17:09:12.857375Z", - "shell.execute_reply": "2024-11-21T17:09:12.857086Z" + "iopub.execute_input": "2024-11-25T21:59:08.398582Z", + "iopub.status.busy": "2024-11-25T21:59:08.398436Z", + "iopub.status.idle": "2024-11-25T21:59:08.400464Z", + "shell.execute_reply": "2024-11-25T21:59:08.400163Z" }, "jukit_cell_id": "EiRceI8zqd" }, @@ -840,10 +840,10 @@ "execution_count": 13, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:12.858449Z", - "iopub.status.busy": "2024-11-21T17:09:12.858346Z", - "iopub.status.idle": "2024-11-21T17:09:12.860296Z", - "shell.execute_reply": "2024-11-21T17:09:12.860027Z" + "iopub.execute_input": "2024-11-25T21:59:08.401706Z", + "iopub.status.busy": "2024-11-25T21:59:08.401531Z", + "iopub.status.idle": "2024-11-25T21:59:08.403487Z", + "shell.execute_reply": "2024-11-25T21:59:08.403195Z" }, "jukit_cell_id": "uHLGwABthL" }, @@ -853,7 +853,7 @@ "output_type": "stream", "text": [ "Output Dataframe Columns:\n", - "['pearsons_correlation', 'Metadata_Concentration__antehoc_group0', 'Metadata_Concentration__antehoc_group1', 'Metadata_siRNA__posthoc_group0', 'Metadata_siRNA__posthoc_group1', 'Metadata_Well__posthoc_group0', 'Metadata_Well__posthoc_group1']\n" + "['pearsons_correlation', 'Metadata_Concentration__antehoc_group0', 'Metadata_Concentration__antehoc_group1', 'Metadata_Well__posthoc_group0', 'Metadata_Well__posthoc_group1', 'Metadata_siRNA__posthoc_group0', 'Metadata_siRNA__posthoc_group1']\n" ] } ], @@ -876,10 +876,10 @@ "execution_count": 14, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:12.861359Z", - "iopub.status.busy": "2024-11-21T17:09:12.861253Z", - "iopub.status.idle": "2024-11-21T17:09:14.911473Z", - "shell.execute_reply": "2024-11-21T17:09:14.911067Z" + "iopub.execute_input": "2024-11-25T21:59:08.404737Z", + "iopub.status.busy": "2024-11-25T21:59:08.404562Z", + "iopub.status.idle": "2024-11-25T21:59:10.713169Z", + "shell.execute_reply": "2024-11-25T21:59:10.712729Z" }, "jukit_cell_id": "OHRc2Dyid6" }, @@ -904,10 +904,10 @@ "execution_count": 15, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:14.913207Z", - "iopub.status.busy": "2024-11-21T17:09:14.913050Z", - "iopub.status.idle": "2024-11-21T17:09:14.918322Z", - "shell.execute_reply": "2024-11-21T17:09:14.918027Z" + "iopub.execute_input": "2024-11-25T21:59:10.715026Z", + "iopub.status.busy": "2024-11-25T21:59:10.714901Z", + "iopub.status.idle": "2024-11-25T21:59:10.719300Z", + "shell.execute_reply": "2024-11-25T21:59:10.719004Z" }, "jukit_cell_id": "rGAPBpItug" }, @@ -934,59 +934,59 @@ " \n", " \n", " pearsons_correlation\n", - " Metadata_siRNA__posthoc_group0\n", - " Metadata_siRNA__posthoc_group1\n", + " Metadata_Well__posthoc_group0\n", + " Metadata_Well__posthoc_group1\n", " \n", " \n", " \n", " \n", " 0\n", - " 0.128001\n", - " (NF1 Target 1, F10)\n", - " (NF1 Target 2, G10)\n", + " -0.258292\n", + " E10\n", + " F10\n", " \n", " \n", " 1\n", - " 0.190290\n", - " (NF1 Target 1, F10)\n", - " (NF1 Target 2, G4)\n", + " -0.011791\n", + " E10\n", + " F4\n", " \n", " \n", " 2\n", - " 0.173737\n", - " (NF1 Target 1, F10)\n", - " (NF1 Target 2, G7)\n", + " 0.188021\n", + " E10\n", + " F7\n", " \n", " \n", " 3\n", - " -0.258292\n", - " (NF1 Target 1, F10)\n", - " (Scramble, E10)\n", + " -0.045444\n", + " E10\n", + " G10\n", " \n", " \n", " 4\n", - " 0.089742\n", - " (NF1 Target 1, F10)\n", - " (Scramble, E4)\n", + " -0.081625\n", + " E10\n", + " G4\n", " \n", " \n", "\n", "" ], "text/plain": [ - " pearsons_correlation Metadata_siRNA__posthoc_group0 \\\n", - "0 0.128001 (NF1 Target 1, F10) \n", - "1 0.190290 (NF1 Target 1, F10) \n", - "2 0.173737 (NF1 Target 1, F10) \n", - "3 -0.258292 (NF1 Target 1, F10) \n", - "4 0.089742 (NF1 Target 1, F10) \n", + " pearsons_correlation Metadata_Well__posthoc_group0 \\\n", + "0 -0.258292 E10 \n", + "1 -0.011791 E10 \n", + "2 0.188021 E10 \n", + "3 -0.045444 E10 \n", + "4 -0.081625 E10 \n", "\n", - " Metadata_siRNA__posthoc_group1 \n", - "0 (NF1 Target 2, G10) \n", - "1 (NF1 Target 2, G4) \n", - "2 (NF1 Target 2, G7) \n", - "3 (Scramble, E10) \n", - "4 (Scramble, E4) " + " Metadata_Well__posthoc_group1 \n", + "0 F10 \n", + "1 F4 \n", + "2 F7 \n", + "3 G10 \n", + "4 G4 " ] }, "execution_count": 15, @@ -1003,10 +1003,10 @@ "execution_count": 16, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:14.919518Z", - "iopub.status.busy": "2024-11-21T17:09:14.919410Z", - "iopub.status.idle": "2024-11-21T17:09:14.921455Z", - "shell.execute_reply": "2024-11-21T17:09:14.921171Z" + "iopub.execute_input": "2024-11-25T21:59:10.720550Z", + "iopub.status.busy": "2024-11-25T21:59:10.720440Z", + "iopub.status.idle": "2024-11-25T21:59:10.722463Z", + "shell.execute_reply": "2024-11-25T21:59:10.722183Z" }, "jukit_cell_id": "zZHKZyRQcw" }, @@ -1030,10 +1030,10 @@ "execution_count": 17, "metadata": { "execution": { - "iopub.execute_input": "2024-11-21T17:09:14.922522Z", - "iopub.status.busy": "2024-11-21T17:09:14.922421Z", - "iopub.status.idle": "2024-11-21T17:09:14.924592Z", - "shell.execute_reply": "2024-11-21T17:09:14.924252Z" + "iopub.execute_input": "2024-11-25T21:59:10.723609Z", + "iopub.status.busy": "2024-11-25T21:59:10.723501Z", + "iopub.status.idle": "2024-11-25T21:59:10.725463Z", + "shell.execute_reply": "2024-11-25T21:59:10.725192Z" }, "jukit_cell_id": "8G7ZiPRvd8" }, @@ -1043,7 +1043,7 @@ "output_type": "stream", "text": [ "Output Dataframe Columns:\n", - "['pearsons_correlation', 'Metadata_siRNA__posthoc_group0', 'Metadata_siRNA__posthoc_group1']\n" + "['pearsons_correlation', 'Metadata_Well__posthoc_group0', 'Metadata_Well__posthoc_group1']\n" ] } ], diff --git a/src/comparators/PearsonsCorrelation.py b/src/comparators/PearsonsCorrelation.py index 58e5a42..4d1bc8d 100644 --- a/src/comparators/PearsonsCorrelation.py +++ b/src/comparators/PearsonsCorrelation.py @@ -25,14 +25,9 @@ def save_groups(self, _group_cols: list[str], **_groups: dict[str, pd.DataFrame] for idx, col in enumerate(_group_cols): for group_name, group in _groups.items(): - if len(_group_cols) > 1: - self._comparisons[f"{col}__{group_name}"].extend( - [group[idx]] * comparison_count - ) - else: - self._comparisons[f"{col}__{group_name}"].extend( - [group] * comparison_count - ) + self._comparisons[f"{col}__{group_name}"].extend( + [group[idx]] * comparison_count + ) @property def comparisons(self): diff --git a/src/comparison_tools/PairwiseCompare.py b/src/comparison_tools/PairwiseCompare.py index 11a9eba..540c59c 100644 --- a/src/comparison_tools/PairwiseCompare.py +++ b/src/comparison_tools/PairwiseCompare.py @@ -1,9 +1,10 @@ import warnings from collections.abc import Iterable from itertools import combinations, product -from typing import Optional +from typing import Any, Optional, Union import pandas as pd + from comparators.Comparator import Comparator @@ -98,12 +99,12 @@ def __init__( self.__antehoc_group_cols = _antehoc_group_cols self.__posthoc_group_cols = _posthoc_group_cols - self.__filtered_antehoc_group_cols = self.__get_group_fields( - _group_cols=self.__antehoc_group_cols, + self.__filtered_antehoc_col_idx = self.__get_group_column_idxs( + _group_columns=self.__antehoc_group_cols, ) - self.__filtered_posthoc_group_cols = self.__get_group_fields( - _group_cols=self.__posthoc_group_cols, + self.__filtered_posthoc_col_idx = self.__get_group_column_idxs( + _group_columns=self.__posthoc_group_cols, ) def __warn_empty_comparisons(self, _comparison_type_name): @@ -124,13 +125,26 @@ def __is_iterable_with_strings(self, _data_structure): if any(not isinstance(element, str) for element in _data_structure): raise TypeError(f"{prefix_msg} Data in Iterable is not of type String.") - def __get_group_fields(self, _group_cols): + def __get_group_column_idxs(self, _group_columns): """Get group fields after removing dropped columns.""" return [ - group_col for group_col in _group_cols if group_col not in self.__drop_cols + col_idx + for col_idx, group_col in enumerate(_group_columns) + if group_col in self.__drop_cols ] + def __get_group_column_element( + self, _group_column_data: tuple[str], _group_column_idxs: list + ): + """Get the corresponding group column element from the index""" + + if _group_column_idxs: + return tuple(_group_column_data[idx] for idx in _group_column_idxs) + + else: + return _group_column_data + def __contains_match(self, _groups): """Check if the same features between both groups are the same value.""" @@ -165,13 +179,11 @@ def inter_comparisons(self): if self.__contains_match(apair): continue - apair0 = apair[0] - apair1 = apair[1] + apair = tuple( + [(item,) if not isinstance(item, tuple) else item for item in apair[:2]] + ) - # Avoids a future deprecation in the pandas get_group method - if not isinstance(apair0, tuple) or not isinstance(apair1, tuple): - apair0 = (apair0,) - apair1 = (apair1,) + apair0, apair1 = apair # Extract the keys for the first post hoc group group0df = groupdf.get_group(apair0).copy() @@ -189,33 +201,59 @@ def inter_comparisons(self): self.__warn_empty_comparisons(_comparison_type_name="Inter Comparisons") continue + if len(self.__filtered_antehoc_col_idx) < len(self.__antehoc_group_cols): + filtered_apair = tuple( + self.__get_group_column_element( + apair[group_idx], self.__filtered_antehoc_col_idx + ) + for group_idx in range(2) + ) + # Iterate through each well group cartesian product and save the data for ppair in comparison_key_product: if self.__contains_match(ppair): continue - ppair0 = ppair[0] - ppair1 = ppair[1] + ppair = tuple( + [ + (item,) if not isinstance(item, tuple) else item + for item in ppair[:2] + ] + ) - # Avoids a future deprecation in the pandas get_group method - if not isinstance(ppair0, tuple) or not isinstance(ppair1, tuple): - ppair0 = (ppair0,) - ppair1 = (ppair1,) + ppair0, ppair1 = ppair self.__comparator( group0df.get_group(ppair0), group1df.get_group(ppair1) ) - self.__comparator.save_groups( - self.__antehoc_group_cols, - **dict(zip(self.__antehoc_group_names, apair)), - ) - - self.__comparator.save_groups( - self.__posthoc_group_cols, - **dict(zip(self.__posthoc_group_names, ppair)), - ) + if len(self.__filtered_antehoc_col_idx) < len( + self.__antehoc_group_cols + ): + self.__comparator.save_groups( + self.__get_group_column_element( + self.__antehoc_group_cols, self.__filtered_antehoc_col_idx + ), + **dict(zip(self.__antehoc_group_names, filtered_apair)), + ) + + if len(self.__filtered_posthoc_col_idx) < len( + self.__posthoc_group_cols + ): + filtered_ppair = tuple( + self.__get_group_column_element( + ppair[group_idx], self.__filtered_posthoc_col_idx + ) + for group_idx in range(2) + ) + + self.__comparator.save_groups( + self.__get_group_column_element( + self.__posthoc_group_cols, self.__filtered_posthoc_col_idx + ), + **dict(zip(self.__posthoc_group_names, filtered_ppair)), + ) def intra_comparisons(self): """ @@ -246,28 +284,56 @@ def intra_comparisons(self): self.__warn_empty_comparisons(_comparison_type_name="Intra Comparisons") continue + if len(self.__filtered_antehoc_col_idx) < len(self.__antehoc_group_cols): + filtered_agroup = self.__get_group_column_element( + agroup, self.__filtered_antehoc_col_idx + ) + # Iterate through the combinations pairs of the groups for ppair in comparison_key_combinations: if self.__contains_match(ppair): continue - ppair0 = ppair[0] - ppair1 = ppair[1] + ppair = tuple( + [ + (item,) if not isinstance(item, tuple) else item + for item in ppair[:2] + ] + ) - # Avoids a future deprecation in the pandas get_group method - if not isinstance(ppair0, tuple) or not isinstance(ppair1, tuple): - ppair0 = (ppair0,) - ppair1 = (ppair1,) + ppair0, ppair1 = ppair self.__comparator(group.get_group(ppair0), group.get_group(ppair1)) - self.__comparator.save_groups( - self.__filtered_antehoc_group_cols, - **dict(zip(self.__antehoc_group_names, (agroup, agroup))), - ) - - self.__comparator.save_groups( - self.__filtered_posthoc_group_cols, - **dict(zip(self.__posthoc_group_names, ppair)), - ) + if len(self.__filtered_antehoc_col_idx) < len( + self.__antehoc_group_cols + ): + self.__comparator.save_groups( + self.__get_group_column_element( + self.__antehoc_group_cols, self.__filtered_antehoc_col_idx + ), + **dict( + zip( + self.__antehoc_group_names, + (filtered_agroup, filtered_agroup), + ) + ), + ) + + if len(self.__filtered_posthoc_col_idx) < len( + self.__posthoc_group_cols + ): + filtered_ppair = tuple( + self.__get_group_column_element( + ppair[group_idx], self.__filtered_posthoc_col_idx + ) + for group_idx in range(2) + ) + + self.__comparator.save_groups( + self.__get_group_column_element( + self.__posthoc_group_cols, self.__filtered_posthoc_col_idx + ), + **dict(zip(self.__posthoc_group_names, filtered_ppair)), + ) From e46a3784d37fb2995b36ca33cf28ca5682921947 Mon Sep 17 00:00:00 2001 From: Cameron Mattson Date: Mon, 2 Dec 2024 12:58:30 -0700 Subject: [PATCH 2/2] Updated type hint based on pr review suggestion --- docs/example_pairwise_comparisons.ipynb | 274 ++++++++++++------------ src/comparators/PearsonsCorrelation.py | 4 +- 2 files changed, 139 insertions(+), 139 deletions(-) diff --git a/docs/example_pairwise_comparisons.ipynb b/docs/example_pairwise_comparisons.ipynb index 4cd1a47..8f4ab69 100644 --- a/docs/example_pairwise_comparisons.ipynb +++ b/docs/example_pairwise_comparisons.ipynb @@ -16,10 +16,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:58:40.882767Z", - "iopub.status.busy": "2024-11-25T21:58:40.882544Z", - "iopub.status.idle": "2024-11-25T21:58:41.115188Z", - "shell.execute_reply": "2024-11-25T21:58:41.114809Z" + "iopub.execute_input": "2024-12-02T19:52:44.430148Z", + "iopub.status.busy": "2024-12-02T19:52:44.430015Z", + "iopub.status.idle": "2024-12-02T19:52:44.647935Z", + "shell.execute_reply": "2024-12-02T19:52:44.647555Z" }, "jukit_cell_id": "JlHVCFc6jg" }, @@ -45,10 +45,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:58:41.116890Z", - "iopub.status.busy": "2024-11-25T21:58:41.116700Z", - "iopub.status.idle": "2024-11-25T21:58:41.119424Z", - "shell.execute_reply": "2024-11-25T21:58:41.119123Z" + "iopub.execute_input": "2024-12-02T19:52:44.649701Z", + "iopub.status.busy": "2024-12-02T19:52:44.649507Z", + "iopub.status.idle": "2024-12-02T19:52:44.652722Z", + "shell.execute_reply": "2024-12-02T19:52:44.652420Z" }, "jukit_cell_id": "KnQdvFxPkd" }, @@ -72,10 +72,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:58:41.120529Z", - "iopub.status.busy": "2024-11-25T21:58:41.120413Z", - "iopub.status.idle": "2024-11-25T21:58:41.177428Z", - "shell.execute_reply": "2024-11-25T21:58:41.177064Z" + "iopub.execute_input": "2024-12-02T19:52:44.653983Z", + "iopub.status.busy": "2024-12-02T19:52:44.653793Z", + "iopub.status.idle": "2024-12-02T19:52:44.706713Z", + "shell.execute_reply": "2024-12-02T19:52:44.706323Z" }, "jukit_cell_id": "dlU1p2c9cE" }, @@ -102,10 +102,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:58:41.178861Z", - "iopub.status.busy": "2024-11-25T21:58:41.178700Z", - "iopub.status.idle": "2024-11-25T21:58:41.191568Z", - "shell.execute_reply": "2024-11-25T21:58:41.191279Z" + "iopub.execute_input": "2024-12-02T19:52:44.708297Z", + "iopub.status.busy": "2024-12-02T19:52:44.708140Z", + "iopub.status.idle": "2024-12-02T19:52:44.721039Z", + "shell.execute_reply": "2024-12-02T19:52:44.720758Z" }, "jukit_cell_id": "gQ5lHlQ04X" }, @@ -393,10 +393,10 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:58:41.216616Z", - "iopub.status.busy": "2024-11-25T21:58:41.216314Z", - "iopub.status.idle": "2024-11-25T21:58:41.218695Z", - "shell.execute_reply": "2024-11-25T21:58:41.218413Z" + "iopub.execute_input": "2024-12-02T19:52:44.745963Z", + "iopub.status.busy": "2024-12-02T19:52:44.745676Z", + "iopub.status.idle": "2024-12-02T19:52:44.748082Z", + "shell.execute_reply": "2024-12-02T19:52:44.747780Z" }, "jukit_cell_id": "ybX13ZBK8Q" }, @@ -443,10 +443,10 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:58:41.220041Z", - "iopub.status.busy": "2024-11-25T21:58:41.219819Z", - "iopub.status.idle": "2024-11-25T21:59:06.064614Z", - "shell.execute_reply": "2024-11-25T21:59:06.064173Z" + "iopub.execute_input": "2024-12-02T19:52:44.749262Z", + "iopub.status.busy": "2024-12-02T19:52:44.749084Z", + "iopub.status.idle": "2024-12-02T19:53:08.806010Z", + "shell.execute_reply": "2024-12-02T19:53:08.805601Z" }, "jukit_cell_id": "CRa8vLBRj8" }, @@ -469,10 +469,10 @@ "execution_count": 7, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:06.066241Z", - "iopub.status.busy": "2024-11-25T21:59:06.066107Z", - "iopub.status.idle": "2024-11-25T21:59:06.071362Z", - "shell.execute_reply": "2024-11-25T21:59:06.071030Z" + "iopub.execute_input": "2024-12-02T19:53:08.807845Z", + "iopub.status.busy": "2024-12-02T19:53:08.807680Z", + "iopub.status.idle": "2024-12-02T19:53:08.812868Z", + "shell.execute_reply": "2024-12-02T19:53:08.812532Z" }, "jukit_cell_id": "cGLOXfXNrN" }, @@ -587,10 +587,10 @@ "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:06.072907Z", - "iopub.status.busy": "2024-11-25T21:59:06.072623Z", - "iopub.status.idle": "2024-11-25T21:59:06.074752Z", - "shell.execute_reply": "2024-11-25T21:59:06.074457Z" + "iopub.execute_input": "2024-12-02T19:53:08.814179Z", + "iopub.status.busy": "2024-12-02T19:53:08.814037Z", + "iopub.status.idle": "2024-12-02T19:53:08.816150Z", + "shell.execute_reply": "2024-12-02T19:53:08.815814Z" }, "jukit_cell_id": "qzMvL0c8sb" }, @@ -614,10 +614,10 @@ "execution_count": 9, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:06.075894Z", - "iopub.status.busy": "2024-11-25T21:59:06.075721Z", - "iopub.status.idle": "2024-11-25T21:59:06.077633Z", - "shell.execute_reply": "2024-11-25T21:59:06.077342Z" + "iopub.execute_input": "2024-12-02T19:53:08.817405Z", + "iopub.status.busy": "2024-12-02T19:53:08.817251Z", + "iopub.status.idle": "2024-12-02T19:53:08.819387Z", + "shell.execute_reply": "2024-12-02T19:53:08.819051Z" }, "jukit_cell_id": "7modF1v2ll" }, @@ -649,10 +649,10 @@ "execution_count": 10, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:06.078770Z", - "iopub.status.busy": "2024-11-25T21:59:06.078592Z", - "iopub.status.idle": "2024-11-25T21:59:08.390094Z", - "shell.execute_reply": "2024-11-25T21:59:08.389654Z" + "iopub.execute_input": "2024-12-02T19:53:08.820624Z", + "iopub.status.busy": "2024-12-02T19:53:08.820488Z", + "iopub.status.idle": "2024-12-02T19:53:11.115742Z", + "shell.execute_reply": "2024-12-02T19:53:11.115336Z" }, "jukit_cell_id": "JPLwsPbVsU" }, @@ -676,10 +676,10 @@ "execution_count": 11, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:08.391840Z", - "iopub.status.busy": "2024-11-25T21:59:08.391712Z", - "iopub.status.idle": "2024-11-25T21:59:08.397305Z", - "shell.execute_reply": "2024-11-25T21:59:08.397005Z" + "iopub.execute_input": "2024-12-02T19:53:11.117480Z", + "iopub.status.busy": "2024-12-02T19:53:11.117341Z", + "iopub.status.idle": "2024-12-02T19:53:11.122848Z", + "shell.execute_reply": "2024-12-02T19:53:11.122493Z" }, "jukit_cell_id": "lFR01EXVLC" }, @@ -708,62 +708,62 @@ " pearsons_correlation\n", " Metadata_Concentration__antehoc_group0\n", " Metadata_Concentration__antehoc_group1\n", - " Metadata_Well__posthoc_group0\n", - " Metadata_Well__posthoc_group1\n", " Metadata_siRNA__posthoc_group0\n", " Metadata_siRNA__posthoc_group1\n", + " Metadata_Well__posthoc_group0\n", + " Metadata_Well__posthoc_group1\n", " \n", " \n", " \n", " \n", " 0\n", - " -0.258292\n", + " 0.128001\n", " 0.001\n", " 0.001\n", - " E10\n", - " F10\n", - " Scramble\n", " NF1 Target 1\n", + " NF1 Target 2\n", + " F10\n", + " G10\n", " \n", " \n", " 1\n", - " -0.011791\n", + " 0.190290\n", " 0.001\n", " 0.001\n", - " E10\n", - " F4\n", - " Scramble\n", " NF1 Target 1\n", + " NF1 Target 2\n", + " F10\n", + " G4\n", " \n", " \n", " 2\n", - " 0.188021\n", + " 0.173737\n", " 0.001\n", " 0.001\n", - " E10\n", - " F7\n", - " Scramble\n", " NF1 Target 1\n", + " NF1 Target 2\n", + " F10\n", + " G7\n", " \n", " \n", " 3\n", - " -0.045444\n", + " -0.258292\n", " 0.001\n", " 0.001\n", - " E10\n", - " G10\n", + " NF1 Target 1\n", " Scramble\n", - " NF1 Target 2\n", + " F10\n", + " E10\n", " \n", " \n", " 4\n", - " -0.081625\n", + " 0.089742\n", " 0.001\n", " 0.001\n", - " E10\n", - " G4\n", + " NF1 Target 1\n", " Scramble\n", - " NF1 Target 2\n", + " F10\n", + " E4\n", " \n", " \n", "\n", @@ -771,32 +771,32 @@ ], "text/plain": [ " pearsons_correlation Metadata_Concentration__antehoc_group0 \\\n", - "0 -0.258292 0.001 \n", - "1 -0.011791 0.001 \n", - "2 0.188021 0.001 \n", - "3 -0.045444 0.001 \n", - "4 -0.081625 0.001 \n", + "0 0.128001 0.001 \n", + "1 0.190290 0.001 \n", + "2 0.173737 0.001 \n", + "3 -0.258292 0.001 \n", + "4 0.089742 0.001 \n", "\n", - " Metadata_Concentration__antehoc_group1 Metadata_Well__posthoc_group0 \\\n", - "0 0.001 E10 \n", - "1 0.001 E10 \n", - "2 0.001 E10 \n", - "3 0.001 E10 \n", - "4 0.001 E10 \n", + " Metadata_Concentration__antehoc_group1 Metadata_siRNA__posthoc_group0 \\\n", + "0 0.001 NF1 Target 1 \n", + "1 0.001 NF1 Target 1 \n", + "2 0.001 NF1 Target 1 \n", + "3 0.001 NF1 Target 1 \n", + "4 0.001 NF1 Target 1 \n", "\n", - " Metadata_Well__posthoc_group1 Metadata_siRNA__posthoc_group0 \\\n", - "0 F10 Scramble \n", - "1 F4 Scramble \n", - "2 F7 Scramble \n", - "3 G10 Scramble \n", - "4 G4 Scramble \n", + " Metadata_siRNA__posthoc_group1 Metadata_Well__posthoc_group0 \\\n", + "0 NF1 Target 2 F10 \n", + "1 NF1 Target 2 F10 \n", + "2 NF1 Target 2 F10 \n", + "3 Scramble F10 \n", + "4 Scramble F10 \n", "\n", - " Metadata_siRNA__posthoc_group1 \n", - "0 NF1 Target 1 \n", - "1 NF1 Target 1 \n", - "2 NF1 Target 1 \n", - "3 NF1 Target 2 \n", - "4 NF1 Target 2 " + " Metadata_Well__posthoc_group1 \n", + "0 G10 \n", + "1 G4 \n", + "2 G7 \n", + "3 E10 \n", + "4 E4 " ] }, "execution_count": 11, @@ -813,10 +813,10 @@ "execution_count": 12, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:08.398582Z", - "iopub.status.busy": "2024-11-25T21:59:08.398436Z", - "iopub.status.idle": "2024-11-25T21:59:08.400464Z", - "shell.execute_reply": "2024-11-25T21:59:08.400163Z" + "iopub.execute_input": "2024-12-02T19:53:11.124166Z", + "iopub.status.busy": "2024-12-02T19:53:11.124023Z", + "iopub.status.idle": "2024-12-02T19:53:11.126142Z", + "shell.execute_reply": "2024-12-02T19:53:11.125797Z" }, "jukit_cell_id": "EiRceI8zqd" }, @@ -840,10 +840,10 @@ "execution_count": 13, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:08.401706Z", - "iopub.status.busy": "2024-11-25T21:59:08.401531Z", - "iopub.status.idle": "2024-11-25T21:59:08.403487Z", - "shell.execute_reply": "2024-11-25T21:59:08.403195Z" + "iopub.execute_input": "2024-12-02T19:53:11.127483Z", + "iopub.status.busy": "2024-12-02T19:53:11.127297Z", + "iopub.status.idle": "2024-12-02T19:53:11.129395Z", + "shell.execute_reply": "2024-12-02T19:53:11.129049Z" }, "jukit_cell_id": "uHLGwABthL" }, @@ -853,7 +853,7 @@ "output_type": "stream", "text": [ "Output Dataframe Columns:\n", - "['pearsons_correlation', 'Metadata_Concentration__antehoc_group0', 'Metadata_Concentration__antehoc_group1', 'Metadata_Well__posthoc_group0', 'Metadata_Well__posthoc_group1', 'Metadata_siRNA__posthoc_group0', 'Metadata_siRNA__posthoc_group1']\n" + "['pearsons_correlation', 'Metadata_Concentration__antehoc_group0', 'Metadata_Concentration__antehoc_group1', 'Metadata_siRNA__posthoc_group0', 'Metadata_siRNA__posthoc_group1', 'Metadata_Well__posthoc_group0', 'Metadata_Well__posthoc_group1']\n" ] } ], @@ -876,10 +876,10 @@ "execution_count": 14, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:08.404737Z", - "iopub.status.busy": "2024-11-25T21:59:08.404562Z", - "iopub.status.idle": "2024-11-25T21:59:10.713169Z", - "shell.execute_reply": "2024-11-25T21:59:10.712729Z" + "iopub.execute_input": "2024-12-02T19:53:11.130698Z", + "iopub.status.busy": "2024-12-02T19:53:11.130526Z", + "iopub.status.idle": "2024-12-02T19:53:13.372280Z", + "shell.execute_reply": "2024-12-02T19:53:13.371877Z" }, "jukit_cell_id": "OHRc2Dyid6" }, @@ -904,10 +904,10 @@ "execution_count": 15, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:10.715026Z", - "iopub.status.busy": "2024-11-25T21:59:10.714901Z", - "iopub.status.idle": "2024-11-25T21:59:10.719300Z", - "shell.execute_reply": "2024-11-25T21:59:10.719004Z" + "iopub.execute_input": "2024-12-02T19:53:13.374236Z", + "iopub.status.busy": "2024-12-02T19:53:13.374071Z", + "iopub.status.idle": "2024-12-02T19:53:13.378578Z", + "shell.execute_reply": "2024-12-02T19:53:13.378233Z" }, "jukit_cell_id": "rGAPBpItug" }, @@ -941,33 +941,33 @@ " \n", " \n", " 0\n", - " -0.258292\n", - " E10\n", + " 0.128001\n", " F10\n", + " G10\n", " \n", " \n", " 1\n", - " -0.011791\n", - " E10\n", - " F4\n", + " 0.190290\n", + " F10\n", + " G4\n", " \n", " \n", " 2\n", - " 0.188021\n", - " E10\n", - " F7\n", + " 0.173737\n", + " F10\n", + " G7\n", " \n", " \n", " 3\n", - " -0.045444\n", + " -0.258292\n", + " F10\n", " E10\n", - " G10\n", " \n", " \n", " 4\n", - " -0.081625\n", - " E10\n", - " G4\n", + " 0.089742\n", + " F10\n", + " E4\n", " \n", " \n", "\n", @@ -975,18 +975,18 @@ ], "text/plain": [ " pearsons_correlation Metadata_Well__posthoc_group0 \\\n", - "0 -0.258292 E10 \n", - "1 -0.011791 E10 \n", - "2 0.188021 E10 \n", - "3 -0.045444 E10 \n", - "4 -0.081625 E10 \n", + "0 0.128001 F10 \n", + "1 0.190290 F10 \n", + "2 0.173737 F10 \n", + "3 -0.258292 F10 \n", + "4 0.089742 F10 \n", "\n", " Metadata_Well__posthoc_group1 \n", - "0 F10 \n", - "1 F4 \n", - "2 F7 \n", - "3 G10 \n", - "4 G4 " + "0 G10 \n", + "1 G4 \n", + "2 G7 \n", + "3 E10 \n", + "4 E4 " ] }, "execution_count": 15, @@ -1003,10 +1003,10 @@ "execution_count": 16, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:10.720550Z", - "iopub.status.busy": "2024-11-25T21:59:10.720440Z", - "iopub.status.idle": "2024-11-25T21:59:10.722463Z", - "shell.execute_reply": "2024-11-25T21:59:10.722183Z" + "iopub.execute_input": "2024-12-02T19:53:13.379837Z", + "iopub.status.busy": "2024-12-02T19:53:13.379697Z", + "iopub.status.idle": "2024-12-02T19:53:13.381799Z", + "shell.execute_reply": "2024-12-02T19:53:13.381472Z" }, "jukit_cell_id": "zZHKZyRQcw" }, @@ -1030,10 +1030,10 @@ "execution_count": 17, "metadata": { "execution": { - "iopub.execute_input": "2024-11-25T21:59:10.723609Z", - "iopub.status.busy": "2024-11-25T21:59:10.723501Z", - "iopub.status.idle": "2024-11-25T21:59:10.725463Z", - "shell.execute_reply": "2024-11-25T21:59:10.725192Z" + "iopub.execute_input": "2024-12-02T19:53:13.383054Z", + "iopub.status.busy": "2024-12-02T19:53:13.382878Z", + "iopub.status.idle": "2024-12-02T19:53:13.384941Z", + "shell.execute_reply": "2024-12-02T19:53:13.384609Z" }, "jukit_cell_id": "8G7ZiPRvd8" }, diff --git a/src/comparators/PearsonsCorrelation.py b/src/comparators/PearsonsCorrelation.py index 4d1bc8d..9697eac 100644 --- a/src/comparators/PearsonsCorrelation.py +++ b/src/comparators/PearsonsCorrelation.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import Optional +from typing import Optional, Any import numpy as np import pandas as pd @@ -18,7 +18,7 @@ def __init__(self, _comparison_name: str = "pearsons_correlation"): def _preprocess_data(self): self._group0, self._group1 = self._group0.values, self._group1.values - def save_groups(self, _group_cols: list[str], **_groups: dict[str, pd.DataFrame]): + def save_groups(self, _group_cols: list[str], **_groups: dict[str, tuple[Any, ...]]): """Save column values defining comparison groups""" comparison_count = self._group0.shape[0] * self._group1.shape[0]