mahmoodlab · pauldoucet · Dec 14, 2024 · Nov 6, 2024 · Nov 7, 2024 · Nov 16, 2024
diff --git a/src/hest/HESTData.py b/src/hest/HESTData.py
@@ -1063,6 +1063,7 @@ def iter_hest(hest_dir: str, id_list: List[str] = None, **read_kwargs) -> HESTIt
     Args:
         hest_dir (str): hest directory containing folders: st, wsis, metadata, tissue_seg (optional)
         id_list (List[str], Optional): list of ids to read (ex: ['TENX96', 'TENX99']), pass None to read all available samples. Default to None
+        load_transcripts (bool, Optional): Whenever to load the full transcript dataframe for Xenium samples. Default to False
 
     Returns:
         HESTIterator: HESTData iterator

diff --git a/tutorials/2-Interacting-with-HEST-1k.ipynb b/tutorials/2-Interacting-with-HEST-1k.ipynb
@@ -25,67 +25,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "d:\\HEST\\HEST\\lib\\site-packages\\hestcore\\wsi.py:27: UserWarning: CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\n",
-      "  warnings.warn(\"CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\")\n",
-      "d:\\HEST\\HEST\\lib\\site-packages\\scanpy\\preprocessing\\_qc.py:432: RuntimeWarning: invalid value encountered in divide\n",
-      "  return values / sums[:, None]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "* Scanpy adata:\n",
-      "AnnData object with n_obs × n_vars = 11845 × 541\n",
-      "    obs: 'in_tissue', 'pxl_col_in_fullres', 'pxl_row_in_fullres', 'array_col', 'array_row', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito'\n",
-      "    var: 'mito', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'\n",
-      "    uns: 'spatial'\n",
-      "    obsm: 'spatial'\n",
-      "\n",
-      "* WSI:\n",
-      "<width=48376, height=53738, backend=OpenSlideWSI>\n",
-      "\n",
-      "* Shapes:\n",
-      "[name: cellvit, coord-system: he, <not loaded>, name: xenium_cell, coord-system: he, <not loaded>, name: xenium_nucleus, coord-system: he, <not loaded>]\n",
-      "\n",
-      "* Tissue contours:\n",
-      "  tissue_id                                           geometry\n",
-      "0         0  POLYGON ((14052 2848, 14025 2874, 13998 2874, ...\n",
-      "\n",
-      "* SpatialData conversion:\n",
-      "SpatialData object\n",
-      "├── Images\n",
-      "│     ├── 'ST_downscaled_hires_image': SpatialImage[cyx] (3, 3358, 3023)\n",
-      "│     └── 'ST_downscaled_lowres_image': SpatialImage[cyx] (3, 1000, 900)\n",
-      "├── Shapes\n",
-      "│     ├── 'cellvit': GeoDataFrame shape: (497508, 3) (2D shapes)\n",
-      "│     ├── 'locations': GeoDataFrame shape: (11845, 2) (2D shapes)\n",
-      "│     ├── 'tissue_contours': GeoDataFrame shape: (1, 2) (2D shapes)\n",
-      "│     ├── 'xenium_cell': GeoDataFrame shape: (574852, 1) (2D shapes)\n",
-      "│     └── 'xenium_nucleus': GeoDataFrame shape: (574852, 1) (2D shapes)\n",
-      "└── Tables\n",
-      "      └── 'table': AnnData (11845, 541)\n",
-      "with coordinate systems:\n",
-      "    ▸ 'ST_downscaled_hires', with elements:\n",
-      "        ST_downscaled_hires_image (Images), cellvit (Shapes), locations (Shapes), tissue_contours (Shapes), xenium_cell (Shapes), xenium_nucleus (Shapes)\n",
-      "    ▸ 'ST_downscaled_lowres', with elements:\n",
-      "        ST_downscaled_lowres_image (Images), cellvit (Shapes), locations (Shapes), tissue_contours (Shapes), xenium_cell (Shapes), xenium_nucleus (Shapes)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from hest import iter_hest\n",
     "\n",
     "# Iterate through a subset of hest\n",
-    "for st in iter_hest('../hest_data', id_list=['TENX95']):\n",
+    "for st in iter_hest('../hest_data', id_list=['TENX105']):\n",
     "\n",
     "    # ST (adata):\n",
     "    adata = st.adata\n",
@@ -235,11 +182,13 @@
    "source": [
     "# directory where the patch .h5 will be saved\n",
     "patch_save_dir = './processed'\n",
+    "new_patch_size = 224\n",
+    "\n",
     "\n",
     "st.dump_patches(\n",
     "    patch_save_dir,\n",
     "    name='demo',\n",
-    "    target_patch_size=224, # target patch size in 224\n",
+    "    target_patch_size=new_patch_size,\n",
     "    target_pixel_size=0.5 # pixel size of the patches in um/px after rescaling\n",
     ")"
    ]
@@ -255,44 +204,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "d:\\HEST\\HEST\\lib\\site-packages\\hestcore\\wsi.py:27: UserWarning: CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\n",
-      "  warnings.warn(\"CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\")\n"
-     ]
-    },
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
-      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
-      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
-      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from hest.readers import pool_transcripts_xenium\n",
     "from hest import iter_hest\n",
     "\n",
     "new_spot_size = 200\n",
-    "\n",
+    "new_patch_size = 224\n",
+    "patch_save_dir = './processed'\n",
     "\n",
     "# Iterate through a subset of hest\n",
-    "for st in iter_hest('../hest_data', id_list=['TENX95'], load_transcripts=True,):\n",
+    "for st in iter_hest('../hest_data', id_list=['TENX105'], load_transcripts=True,):\n",
     "    print(st.transcript_df)\n",
     "\n",
-    "    # Feel free to convert st.transcript_df to a Dask DataFrame if you are working with limited RAM.\n",
-    "\n",
     "    st.adata = pool_transcripts_xenium(\n",
-    "                    st.transcript_df, \n",
+    "                    st.transcript_df, # Feel free to convert st.transcript_df to a Dask DataFrame if you are working with limited RAM.\n",
     "                    st.pixel_size, \n",
     "                    key_x='he_x',\n",
     "                    key_y='he_y',\n",
@@ -304,7 +232,7 @@
     "    st.dump_patches(\n",
     "        patch_save_dir,\n",
     "        name='demo',\n",
-    "        target_patch_size=224, # target patch size in 224\n",
+    "        target_patch_size=new_patch_size, # target patch size in 224\n",
     "        target_pixel_size=0.5 # pixel size of the patches in um/px after rescaling\n",
     "    )"
    ]
@@ -370,7 +298,7 @@
     "from hest import iter_hest\n",
     "\n",
     "# Iterate through a subset of hest\n",
-    "for st in iter_hest('../hest_data', id_list=['TENX95'], load_transcripts=True):\n",
+    "for st in iter_hest('../hest_data', id_list=['TENX105'], load_transcripts=True):\n",
     "    print(st.transcript_df)"
    ]
   },