From 4b10f4e9311468e26673714892984acf64080e85 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Mon, 11 Dec 2023 16:09:50 -0500 Subject: [PATCH 1/3] Create mini dataset notebook --- docs/notebooks/create_mini_dataset.ipynb | 674 +++++++++++++++++++++++ 1 file changed, 674 insertions(+) create mode 100644 docs/notebooks/create_mini_dataset.ipynb diff --git a/docs/notebooks/create_mini_dataset.ipynb b/docs/notebooks/create_mini_dataset.ipynb new file mode 100644 index 00000000..07ca8147 --- /dev/null +++ b/docs/notebooks/create_mini_dataset.ipynb @@ -0,0 +1,674 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 192, + "id": "28eae108-fcaa-465d-9fd0-4fe0cffeafe1", + "metadata": {}, + "outputs": [], + "source": [ + "from anndata import AnnData\n", + "from mudata import MuData\n", + "import numpy as np\n", + "import pandas as pd\n", + "from skimage.draw import disk\n", + "from tifffile import imread\n", + "import scanpy as sc\n", + "from vitessce.data_utils import (\n", + " rgb_img_to_ome_tiff,\n", + " multiplex_img_to_ome_tiff,\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "ada13ec1-a7e4-4ee8-aff0-a6df2ce4c118", + "metadata": {}, + "outputs": [], + "source": [ + "img_arr = imread(\"/Users/mkeller/Downloads/exemplar-001.pyramid.ome.tif\")" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "5903f357-0208-4651-bf8d-27e5acb72d6f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(12, 3138, 2509)" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "img_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "3104abce-d31a-4156-9272-4cd070153695", + "metadata": {}, + "outputs": [], + "source": [ + "bitmask_arr = imread(\"/Users/mkeller/Downloads/cellMask.pyramid.ome.tif\")" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "id": "7af584e4-6a68-4cc8-8ca1-b50ae26d237c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3138, 2509)" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bitmask_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "6070ce48-149e-4748-bb73-ef1fc383b4a6", + "metadata": {}, + "outputs": [], + "source": [ + "img_crop_arr = img_arr[0:4, 1000:1250, 1000:1250]" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "id": "566d5acd-d9fb-479e-ab74-3ae0d2e41e2d", + "metadata": {}, + "outputs": [], + "source": [ + "cells_bitmask_crop_arr = bitmask_arr[1000:1250, 1000:1250]" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "id": "267af046-78db-4371-9a1d-de43153b3a50", + "metadata": {}, + "outputs": [], + "source": [ + "multiplex_img_to_ome_tiff(img_crop_arr, [\"DNA\", \"FDX1\", \"CD1D\", \"CD357\"], \"/Users/mkeller/Downloads/exemplar-001.crop.ome.tif\", axes='CYX')" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "id": "44a5e826-0de7-4112-b45e-ec1683591e82", + "metadata": {}, + "outputs": [], + "source": [ + "shapes_bitmask_crop_arr = np.zeros((250, 250))\n", + "radius = 5\n", + "for i in range(16):\n", + " for j in range(16):\n", + " disk_center = (10 + i*15, 10 + j*15)\n", + " shapes_bitmask_crop_arr[disk(disk_center, radius)] = i*16+j+1 # add one (0 is reserved for the background)\n", + "\n", + "# Update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n", + "shapes_bitmask_crop_arr = shapes_bitmask_crop_arr.transpose((1, 0)) # (y, x)" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "e7784e4f-c848-49f5-98ce-82161967f129", + "metadata": {}, + "outputs": [], + "source": [ + "multichannel_bitmask_arr = np.stack((cells_bitmask_crop_arr, shapes_bitmask_crop_arr))" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "id": "b378aa50-f97c-4514-82b7-8118d0d2f5b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2, 250, 250)" + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "multichannel_bitmask_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "37bdc627-c703-4e6a-bbf9-07a94042e6b2", + "metadata": {}, + "outputs": [], + "source": [ + "multiplex_img_to_ome_tiff(multichannel_bitmask_arr, [\"Cells\", \"Circles\"], \"/Users/mkeller/Downloads/exemplar-001.crop.segmentations.ome.tif\", axes='CYX')" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "id": "f2cb43d3-a017-4433-8267-f39052cea395", + "metadata": {}, + "outputs": [], + "source": [ + "cell_ids = np.unique(multichannel_bitmask_arr[0, :, :])[1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "id": "4653bce1-e66f-4a87-873e-736fe9a0727b", + "metadata": {}, + "outputs": [], + "source": [ + "circle_ids = np.unique(multichannel_bitmask_arr[1, :, :])[1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "e3a7b75b-5352-434f-8232-8d2940b35edd", + "metadata": {}, + "outputs": [], + "source": [ + "cell_df = pd.DataFrame(index=cell_ids.astype(int), data=[], columns=[])" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "id": "f7723381-598b-4293-a7ff-7c8e136660e0", + "metadata": {}, + "outputs": [], + "source": [ + "circle_df = pd.DataFrame(index=circle_ids.astype(int), data=[], columns=[\"X\", \"Y\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "id": "0b627f36-3114-43cd-bf18-9aa097ee1144", + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(16):\n", + " for j in range(16):\n", + " c_x = 10 + i*15\n", + " c_y = 10 + j*15\n", + " c_index = int(i*16+j+1)\n", + " circle_df.at[c_index, \"X\"] = c_x\n", + " circle_df.at[c_index, \"Y\"] = c_y" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "2e6a931a-1564-4091-b65e-42383648921e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
1006
1007
1011
1012
1018
...
2012
2016
2021
2023
2024
\n", + "

218 rows × 0 columns

\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: [1006, 1007, 1011, 1012, 1018, 1021, 1028, 1029, 1030, 1041, 1056, 1064, 1068, 1069, 1070, 1073, 1074, 1075, 1081, 1085, 1091, 1099, 1103, 1104, 1110, 1111, 1112, 1117, 1122, 1125, 1126, 1131, 1135, 1136, 1140, 1144, 1150, 1159, 1161, 1164, 1171, 1172, 1185, 1193, 1194, 1199, 1200, 1204, 1209, 1210, 1212, 1219, 1220, 1229, 1238, 1239, 1240, 1245, 1251, 1260, 1261, 1264, 1265, 1266, 1267, 1288, 1289, 1293, 1298, 1304, 1305, 1311, 1321, 1322, 1323, 1335, 1337, 1346, 1347, 1350, 1354, 1355, 1361, 1362, 1369, 1378, 1381, 1389, 1390, 1395, 1396, 1402, 1403, 1419, 1422, 1423, 1428, 1439, 1440, 1441, ...]\n", + "\n", + "[218 rows x 0 columns]" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cell_df" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "id": "2adc9038-88af-4493-a592-4c9f19e976d8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0., 0., 0., ..., 1012., 1012., 1012.],\n", + " [ 0., 0., 0., ..., 1012., 1012., 1012.],\n", + " [ 0., 0., 0., ..., 1012., 1012., 1012.],\n", + " ...,\n", + " [ 0., 0., 0., ..., 0., 0., 0.],\n", + " [ 0., 0., 0., ..., 0., 0., 0.],\n", + " [ 0., 0., 0., ..., 0., 0., 0.]])" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "multichannel_bitmask_arr[0, :, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "id": "ee3a3b6a-bb4d-4542-be61-5ea3595c53f1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_centroid(bitmask_arr, obs_id):\n", + " matching_px = np.argwhere(bitmask_arr == obs_id)\n", + " count = matching_px.shape[0]\n", + " result = matching_px.sum(0) / count\n", + " return (result[0], result[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "id": "70b77e9b-6715-4bbc-a09a-ba5e9226acc3", + "metadata": {}, + "outputs": [], + "source": [ + "def get_channel_value(bitmask_arr, img_arr, obs_id):\n", + " matching_px = (bitmask_arr == obs_id)\n", + " matching_img = img_arr[matching_px]\n", + " return matching_img.sum() / matching_px.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "id": "439d6e24-95c2-4050-8e4b-3eeb6c60a31b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "60204.72093023256" + ] + }, + "execution_count": 185, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_channel_value(multichannel_bitmask_arr[0, :, :], img_crop_arr[0, :, :], 1011)" + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "id": "3d2f4553-181f-4877-a656-247263e97689", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1.55, 191.525)" + ] + }, + "execution_count": 186, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_centroid(multichannel_bitmask_arr[0, :, :], 1006)" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "id": "401454f4-e250-405d-bb15-bc6bc3104018", + "metadata": {}, + "outputs": [], + "source": [ + "var_df = pd.DataFrame(index=[\"DNA\", \"FDX1\", \"CD1D\", \"CD357\"], data=[], columns=[])" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "id": "12fa9835-5ba8-49a9-b245-555ea03c8223", + "metadata": {}, + "outputs": [], + "source": [ + "cell_df[\"X\"] = [get_centroid(multichannel_bitmask_arr[0, :, :], int(i))[0] for i in cell_df.index]\n", + "cell_df[\"Y\"] = [get_centroid(multichannel_bitmask_arr[0, :, :], int(i))[1] for i in cell_df.index]" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "id": "7ecae4d3-3eea-4594-b8d1-1c23578a70b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
XY
10061.550000191.525000
10071.930556212.305556
10113.23255848.581395
10121.363636248.090909
10180.578947156.631579
.........
2012247.225806194.161290
2016248.214286217.785714
2021248.857143161.285714
2023249.00000066.000000
2024249.000000205.000000
\n", + "

218 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " X Y\n", + "1006 1.550000 191.525000\n", + "1007 1.930556 212.305556\n", + "1011 3.232558 48.581395\n", + "1012 1.363636 248.090909\n", + "1018 0.578947 156.631579\n", + "... ... ...\n", + "2012 247.225806 194.161290\n", + "2016 248.214286 217.785714\n", + "2021 248.857143 161.285714\n", + "2023 249.000000 66.000000\n", + "2024 249.000000 205.000000\n", + "\n", + "[218 rows x 2 columns]" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cell_df" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "id": "25d7ba6e-f452-4033-af74-7400a356bc7f", + "metadata": {}, + "outputs": [], + "source": [ + "cell_X = np.zeros((cell_df.shape[0], var_df.shape[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "id": "3a7e9751-562f-4bc7-854e-bb382ffff24c", + "metadata": {}, + "outputs": [], + "source": [ + "for c_index in range(4):\n", + " for i, cell_id in enumerate(cell_df.index):\n", + " cell_X[i, c_index] = get_channel_value(multichannel_bitmask_arr[0, :, :], img_crop_arr[c_index, :, :], cell_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "id": "b8b21ea2-33d4-4756-a890-9b67fb40824a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.\n", + " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n" + ] + } + ], + "source": [ + "adata = AnnData(X=cell_X, obs=cell_df, var=var_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "id": "a1ed5534-c301-4af9-9d28-df23483af4ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AnnData object with n_obs × n_vars = 218 × 4\n", + " obs: 'X', 'Y'" + ] + }, + "execution_count": 194, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "id": "a3950efb-38e9-4f78-802b-624d5fbbec66", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/scanpy/plotting/_anndata.py:1068: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if isinstance(groupby, str) and is_categorical_dtype(adata.obs[groupby]):\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sc.pl.heatmap(adata, var_names = var_df.index.tolist(), groupby=\"X\", standard_scale=\"var\", swap_axes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abb4c395-3102-4205-8cbf-e7796f6b866b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 48467727d57cb74705f7cd0592f5125707411b06 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Wed, 17 Jan 2024 19:32:51 -0500 Subject: [PATCH 2/3] Update --- docs/notebooks/create_mini_dataset.ipynb | 396 ++++++++++++++++++++--- pyproject.toml | 2 +- 2 files changed, 355 insertions(+), 43 deletions(-) diff --git a/docs/notebooks/create_mini_dataset.ipynb b/docs/notebooks/create_mini_dataset.ipynb index 07ca8147..bbe25aa7 100644 --- a/docs/notebooks/create_mini_dataset.ipynb +++ b/docs/notebooks/create_mini_dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 192, + "execution_count": 1, "id": "28eae108-fcaa-465d-9fd0-4fe0cffeafe1", "metadata": {}, "outputs": [], @@ -17,6 +17,8 @@ "from vitessce.data_utils import (\n", " rgb_img_to_ome_tiff,\n", " multiplex_img_to_ome_tiff,\n", + " rgb_img_to_ome_zarr,\n", + " multiplex_img_to_ome_zarr,\n", " optimize_adata,\n", " VAR_CHUNK_SIZE,\n", ")" @@ -24,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 2, "id": "ada13ec1-a7e4-4ee8-aff0-a6df2ce4c118", "metadata": {}, "outputs": [], @@ -34,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 3, "id": "5903f357-0208-4651-bf8d-27e5acb72d6f", "metadata": {}, "outputs": [ @@ -44,7 +46,7 @@ "(12, 3138, 2509)" ] }, - "execution_count": 145, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -55,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 4, "id": "3104abce-d31a-4156-9272-4cd070153695", "metadata": {}, "outputs": [], @@ -65,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 5, "id": "7af584e4-6a68-4cc8-8ca1-b50ae26d237c", "metadata": {}, "outputs": [ @@ -75,7 +77,7 @@ "(3138, 2509)" ] }, - "execution_count": 147, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -86,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 6, "id": "6070ce48-149e-4748-bb73-ef1fc383b4a6", "metadata": {}, "outputs": [], @@ -96,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 7, "id": "566d5acd-d9fb-479e-ab74-3ae0d2e41e2d", "metadata": {}, "outputs": [], @@ -106,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 8, "id": "267af046-78db-4371-9a1d-de43153b3a50", "metadata": {}, "outputs": [], @@ -116,7 +118,17 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 9, + "id": "2121fbae-6e05-4a22-826a-093a35bda15e", + "metadata": {}, + "outputs": [], + "source": [ + "multiplex_img_to_ome_zarr(img_crop_arr, [\"DNA\", \"FDX1\", \"CD1D\", \"CD357\"], \"/Users/mkeller/Downloads/exemplar-001.crop.image.ome.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "id": "44a5e826-0de7-4112-b45e-ec1683591e82", "metadata": {}, "outputs": [], @@ -134,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 11, "id": "e7784e4f-c848-49f5-98ce-82161967f129", "metadata": {}, "outputs": [], @@ -144,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 12, "id": "b378aa50-f97c-4514-82b7-8118d0d2f5b1", "metadata": {}, "outputs": [ @@ -154,7 +166,7 @@ "(2, 250, 250)" ] }, - "execution_count": 153, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -165,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 13, "id": "37bdc627-c703-4e6a-bbf9-07a94042e6b2", "metadata": {}, "outputs": [], @@ -175,7 +187,17 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 39, + "id": "1500fbd0-9ace-4f12-826b-753458f8c4cc", + "metadata": {}, + "outputs": [], + "source": [ + "multiplex_img_to_ome_zarr(multichannel_bitmask_arr, [\"cell\", \"circle\"], \"/Users/mkeller/Downloads/exemplar-001.crop.segmentations.ome.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "id": "f2cb43d3-a017-4433-8267-f39052cea395", "metadata": {}, "outputs": [], @@ -185,7 +207,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 16, "id": "4653bce1-e66f-4a87-873e-736fe9a0727b", "metadata": {}, "outputs": [], @@ -195,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 17, "id": "e3a7b75b-5352-434f-8232-8d2940b35edd", "metadata": {}, "outputs": [], @@ -205,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 18, "id": "f7723381-598b-4293-a7ff-7c8e136660e0", "metadata": {}, "outputs": [], @@ -215,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 19, "id": "0b627f36-3114-43cd-bf18-9aa097ee1144", "metadata": {}, "outputs": [], @@ -231,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 20, "id": "2e6a931a-1564-4091-b65e-42383648921e", "metadata": {}, "outputs": [ @@ -305,7 +327,7 @@ "[218 rows x 0 columns]" ] }, - "execution_count": 160, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -316,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": 21, "id": "2adc9038-88af-4493-a592-4c9f19e976d8", "metadata": {}, "outputs": [ @@ -332,7 +354,7 @@ " [ 0., 0., 0., ..., 0., 0., 0.]])" ] }, - "execution_count": 161, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -343,7 +365,7 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 22, "id": "ee3a3b6a-bb4d-4542-be61-5ea3595c53f1", "metadata": {}, "outputs": [], @@ -357,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 23, "id": "70b77e9b-6715-4bbc-a09a-ba5e9226acc3", "metadata": {}, "outputs": [], @@ -370,7 +392,7 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": 24, "id": "439d6e24-95c2-4050-8e4b-3eeb6c60a31b", "metadata": {}, "outputs": [ @@ -380,7 +402,7 @@ "60204.72093023256" ] }, - "execution_count": 185, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -391,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 186, + "execution_count": 25, "id": "3d2f4553-181f-4877-a656-247263e97689", "metadata": {}, "outputs": [ @@ -401,7 +423,7 @@ "(1.55, 191.525)" ] }, - "execution_count": 186, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -412,7 +434,7 @@ }, { "cell_type": "code", - "execution_count": 187, + "execution_count": 26, "id": "401454f4-e250-405d-bb15-bc6bc3104018", "metadata": {}, "outputs": [], @@ -422,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 165, + "execution_count": 27, "id": "12fa9835-5ba8-49a9-b245-555ea03c8223", "metadata": {}, "outputs": [], @@ -433,7 +455,7 @@ }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 28, "id": "7ecae4d3-3eea-4594-b8d1-1c23578a70b1", "metadata": {}, "outputs": [ @@ -540,7 +562,7 @@ "[218 rows x 2 columns]" ] }, - "execution_count": 166, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -551,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 189, + "execution_count": 29, "id": "25d7ba6e-f452-4033-af74-7400a356bc7f", "metadata": {}, "outputs": [], @@ -561,7 +583,7 @@ }, { "cell_type": "code", - "execution_count": 190, + "execution_count": 30, "id": "3a7e9751-562f-4bc7-854e-bb382ffff24c", "metadata": {}, "outputs": [], @@ -573,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 193, + "execution_count": 31, "id": "b8b21ea2-33d4-4756-a890-9b67fb40824a", "metadata": {}, "outputs": [ @@ -581,7 +603,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.\n", + "/var/folders/36/83j6x3ln225bvbpk1_vdzrm00000gn/T/ipykernel_15643/3333765654.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n", + " adata = AnnData(X=cell_X, obs=cell_df, var=var_df)\n", + "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n", " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n" ] } @@ -592,7 +616,7 @@ }, { "cell_type": "code", - "execution_count": 194, + "execution_count": 32, "id": "a1ed5534-c301-4af9-9d28-df23483af4ab", "metadata": {}, "outputs": [ @@ -603,7 +627,7 @@ " obs: 'X', 'Y'" ] }, - "execution_count": 194, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -614,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 201, + "execution_count": 33, "id": "a3950efb-38e9-4f78-802b-624d5fbbec66", "metadata": {}, "outputs": [ @@ -643,10 +667,298 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "abb4c395-3102-4205-8cbf-e7796f6b866b", "metadata": {}, "outputs": [], + "source": [ + "adata.write_zarr(\"/Users/mkeller/Downloads/exemplar-001.crop.cells.adata.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "9c97045b-85a2-4ff8-bd62-4d777bbf4fd7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
XY
11010
21025
31040
41055
51070
.........
252235175
253235190
254235205
255235220
256235235
\n", + "

256 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " X Y\n", + "1 10 10\n", + "2 10 25\n", + "3 10 40\n", + "4 10 55\n", + "5 10 70\n", + ".. ... ...\n", + "252 235 175\n", + "253 235 190\n", + "254 235 205\n", + "255 235 220\n", + "256 235 235\n", + "\n", + "[256 rows x 2 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circle_df" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "a0923acc-a263-48ac-8965-f77a536dce2a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n", + " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n", + "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n", + " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n" + ] + } + ], + "source": [ + "circle_adata = AnnData(X=None, obs=circle_df, var=pd.DataFrame(data=[]))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "887679dc-a9a8-4a7e-8da2-a527634e5d01", + "metadata": {}, + "outputs": [], + "source": [ + "circle_adata.obs['X'] = circle_adata.obs['X'].astype(int)\n", + "circle_adata.obs['Y'] = circle_adata.obs['Y'].astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "be19ef76-27c3-42a8-89ca-326b1d835f66", + "metadata": {}, + "outputs": [], + "source": [ + "circle_adata.write_zarr(\"/Users/mkeller/Downloads/exemplar-001.crop.circles.adata.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "d238ead4-890d-45ed-813c-f4a770ce01ec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
XY
10061.550000191.525000
10071.930556212.305556
10113.23255848.581395
10121.363636248.090909
10180.578947156.631579
.........
2012247.225806194.161290
2016248.214286217.785714
2021248.857143161.285714
2023249.00000066.000000
2024249.000000205.000000
\n", + "

218 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " X Y\n", + "1006 1.550000 191.525000\n", + "1007 1.930556 212.305556\n", + "1011 3.232558 48.581395\n", + "1012 1.363636 248.090909\n", + "1018 0.578947 156.631579\n", + "... ... ...\n", + "2012 247.225806 194.161290\n", + "2016 248.214286 217.785714\n", + "2021 248.857143 161.285714\n", + "2023 249.000000 66.000000\n", + "2024 249.000000 205.000000\n", + "\n", + "[218 rows x 2 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.obs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16eca51a-2c94-4091-9eae-9adf52bcd081", + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/pyproject.toml b/pyproject.toml index d9e9406c..ab4103cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ 'numpy>=1.21.2', 'anndata>=0.7.8,<0.9', 'scanpy>=1.9.3', - 'ome-zarr==0.2.1', + 'ome-zarr==0.8.3', 'tifffile>=2020.10.1', 'jsonschema>=3.2' ] From 6a2b45a0ec7ac8927e3b0ed4842c7bb7fb512de3 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Mon, 22 Jan 2024 13:21:25 -0500 Subject: [PATCH 3/3] Mini dataset notebook --- docs/notebooks/create_mini_dataset.ipynb | 324 ++++++++++++++--------- 1 file changed, 204 insertions(+), 120 deletions(-) diff --git a/docs/notebooks/create_mini_dataset.ipynb b/docs/notebooks/create_mini_dataset.ipynb index bbe25aa7..6b45dc46 100644 --- a/docs/notebooks/create_mini_dataset.ipynb +++ b/docs/notebooks/create_mini_dataset.ipynb @@ -187,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 14, "id": "1500fbd0-9ace-4f12-826b-753458f8c4cc", "metadata": {}, "outputs": [], @@ -603,7 +603,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/36/83j6x3ln225bvbpk1_vdzrm00000gn/T/ipykernel_15643/3333765654.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n", + "/var/folders/36/83j6x3ln225bvbpk1_vdzrm00000gn/T/ipykernel_75334/3333765654.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n", " adata = AnnData(X=cell_X, obs=cell_df, var=var_df)\n", "/Users/mkeller/software/miniconda3/envs/vitessce-python-notebooks/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n", " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n" @@ -636,6 +636,208 @@ "adata" ] }, + { + "cell_type": "code", + "execution_count": 46, + "id": "37f46887-2f03-4dee-a5e1-9607ee8da2b8", + "metadata": {}, + "outputs": [], + "source": [ + "cell_types = [\n", + " [\"Astrocyte\", \"Astrocyte Gfap\"],\n", + " [\"Astrocyte\", \"Astrocyte Mfge8\"],\n", + " [\"Brain immune\", \"Microglia\"],\n", + " [\"Brain immune\", \"Perivascular Macrophage\"],\n", + " [\"Excitatory neurons\", \"Hippocampus\"]\n", + "]\n", + "leiden_clusters = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "\n", + "adata.obs[\"cell_type_i\"] = [ i % len(cell_types) for i in range(adata.obs.shape[0])]\n", + "adata.obs[\"cell_type_coarse\"] = adata.obs[\"cell_type_i\"].apply(lambda i: cell_types[i][0])\n", + "adata.obs[\"cell_type_fine\"] = adata.obs[\"cell_type_i\"].apply(lambda i: cell_types[i][1])\n", + "adata.obs[\"leiden_cluster\"] = [ leiden_clusters[i % len(leiden_clusters)] for i in range(adata.obs.shape[0])]\n", + "adata.obs[\"leiden_cluster_str\"] = [ f\"Cluster {leiden_clusters[i % len(leiden_clusters)] for i in range(adata.obs.shape[0])]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "da20e8cc-8396-48a6-938e-a643f7680a3b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
XYcell_type_icell_type_coarsecell_type_fineleiden_cluster
10061.550000191.5250000AstrocyteAstrocyte Gfap1
10071.930556212.3055561AstrocyteAstrocyte Mfge82
10113.23255848.5813952Brain immuneMicroglia3
10121.363636248.0909093Brain immunePerivascular Macrophage4
10180.578947156.6315794Excitatory neuronsHippocampus5
.....................
2012247.225806194.1612903Brain immunePerivascular Macrophage7
2016248.214286217.7857144Excitatory neuronsHippocampus8
2021248.857143161.2857140AstrocyteAstrocyte Gfap9
2023249.00000066.0000001AstrocyteAstrocyte Mfge81
2024249.000000205.0000002Brain immuneMicroglia2
\n", + "

218 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " X Y cell_type_i cell_type_coarse \\\n", + "1006 1.550000 191.525000 0 Astrocyte \n", + "1007 1.930556 212.305556 1 Astrocyte \n", + "1011 3.232558 48.581395 2 Brain immune \n", + "1012 1.363636 248.090909 3 Brain immune \n", + "1018 0.578947 156.631579 4 Excitatory neurons \n", + "... ... ... ... ... \n", + "2012 247.225806 194.161290 3 Brain immune \n", + "2016 248.214286 217.785714 4 Excitatory neurons \n", + "2021 248.857143 161.285714 0 Astrocyte \n", + "2023 249.000000 66.000000 1 Astrocyte \n", + "2024 249.000000 205.000000 2 Brain immune \n", + "\n", + " cell_type_fine leiden_cluster \n", + "1006 Astrocyte Gfap 1 \n", + "1007 Astrocyte Mfge8 2 \n", + "1011 Microglia 3 \n", + "1012 Perivascular Macrophage 4 \n", + "1018 Hippocampus 5 \n", + "... ... ... \n", + "2012 Perivascular Macrophage 7 \n", + "2016 Hippocampus 8 \n", + "2021 Astrocyte Gfap 9 \n", + "2023 Astrocyte Mfge8 1 \n", + "2024 Microglia 2 \n", + "\n", + "[218 rows x 6 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.obs" + ] + }, { "cell_type": "code", "execution_count": 33, @@ -835,124 +1037,6 @@ "circle_adata.write_zarr(\"/Users/mkeller/Downloads/exemplar-001.crop.circles.adata.zarr\")" ] }, - { - "cell_type": "code", - "execution_count": 40, - "id": "d238ead4-890d-45ed-813c-f4a770ce01ec", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
XY
10061.550000191.525000
10071.930556212.305556
10113.23255848.581395
10121.363636248.090909
10180.578947156.631579
.........
2012247.225806194.161290
2016248.214286217.785714
2021248.857143161.285714
2023249.00000066.000000
2024249.000000205.000000
\n", - "

218 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " X Y\n", - "1006 1.550000 191.525000\n", - "1007 1.930556 212.305556\n", - "1011 3.232558 48.581395\n", - "1012 1.363636 248.090909\n", - "1018 0.578947 156.631579\n", - "... ... ...\n", - "2012 247.225806 194.161290\n", - "2016 248.214286 217.785714\n", - "2021 248.857143 161.285714\n", - "2023 249.000000 66.000000\n", - "2024 249.000000 205.000000\n", - "\n", - "[218 rows x 2 columns]" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "adata.obs" - ] - }, { "cell_type": "code", "execution_count": null,