Skip to content

Update tutorial for Xenium pooling #81

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/hest/HESTData.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,7 @@ def iter_hest(hest_dir: str, id_list: List[str] = None, **read_kwargs) -> HESTIt
Args:
hest_dir (str): hest directory containing folders: st, wsis, metadata, tissue_seg (optional)
id_list (List[str], Optional): list of ids to read (ex: ['TENX96', 'TENX99']), pass None to read all available samples. Default to None
load_transcripts (bool, Optional): Whenever to load the full transcript dataframe for Xenium samples. Default to False

Returns:
HESTIterator: HESTData iterator
Expand Down
100 changes: 14 additions & 86 deletions tutorials/2-Interacting-with-HEST-1k.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,67 +25,14 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\HEST\\HEST\\lib\\site-packages\\hestcore\\wsi.py:27: UserWarning: CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\n",
" warnings.warn(\"CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\")\n",
"d:\\HEST\\HEST\\lib\\site-packages\\scanpy\\preprocessing\\_qc.py:432: RuntimeWarning: invalid value encountered in divide\n",
" return values / sums[:, None]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"* Scanpy adata:\n",
"AnnData object with n_obs × n_vars = 11845 × 541\n",
" obs: 'in_tissue', 'pxl_col_in_fullres', 'pxl_row_in_fullres', 'array_col', 'array_row', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito'\n",
" var: 'mito', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'\n",
" uns: 'spatial'\n",
" obsm: 'spatial'\n",
"\n",
"* WSI:\n",
"<width=48376, height=53738, backend=OpenSlideWSI>\n",
"\n",
"* Shapes:\n",
"[name: cellvit, coord-system: he, <not loaded>, name: xenium_cell, coord-system: he, <not loaded>, name: xenium_nucleus, coord-system: he, <not loaded>]\n",
"\n",
"* Tissue contours:\n",
" tissue_id geometry\n",
"0 0 POLYGON ((14052 2848, 14025 2874, 13998 2874, ...\n",
"\n",
"* SpatialData conversion:\n",
"SpatialData object\n",
"├── Images\n",
"│ ├── 'ST_downscaled_hires_image': SpatialImage[cyx] (3, 3358, 3023)\n",
"│ └── 'ST_downscaled_lowres_image': SpatialImage[cyx] (3, 1000, 900)\n",
"├── Shapes\n",
"│ ├── 'cellvit': GeoDataFrame shape: (497508, 3) (2D shapes)\n",
"│ ├── 'locations': GeoDataFrame shape: (11845, 2) (2D shapes)\n",
"│ ├── 'tissue_contours': GeoDataFrame shape: (1, 2) (2D shapes)\n",
"│ ├── 'xenium_cell': GeoDataFrame shape: (574852, 1) (2D shapes)\n",
"│ └── 'xenium_nucleus': GeoDataFrame shape: (574852, 1) (2D shapes)\n",
"└── Tables\n",
" └── 'table': AnnData (11845, 541)\n",
"with coordinate systems:\n",
" ▸ 'ST_downscaled_hires', with elements:\n",
" ST_downscaled_hires_image (Images), cellvit (Shapes), locations (Shapes), tissue_contours (Shapes), xenium_cell (Shapes), xenium_nucleus (Shapes)\n",
" ▸ 'ST_downscaled_lowres', with elements:\n",
" ST_downscaled_lowres_image (Images), cellvit (Shapes), locations (Shapes), tissue_contours (Shapes), xenium_cell (Shapes), xenium_nucleus (Shapes)\n"
]
}
],
"outputs": [],
"source": [
"from hest import iter_hest\n",
"\n",
"# Iterate through a subset of hest\n",
"for st in iter_hest('../hest_data', id_list=['TENX95']):\n",
"for st in iter_hest('../hest_data', id_list=['TENX105']):\n",
"\n",
" # ST (adata):\n",
" adata = st.adata\n",
Expand Down Expand Up @@ -235,11 +182,13 @@
"source": [
"# directory where the patch .h5 will be saved\n",
"patch_save_dir = './processed'\n",
"new_patch_size = 224\n",
"\n",
"\n",
"st.dump_patches(\n",
" patch_save_dir,\n",
" name='demo',\n",
" target_patch_size=224, # target patch size in 224\n",
" target_patch_size=new_patch_size,\n",
" target_pixel_size=0.5 # pixel size of the patches in um/px after rescaling\n",
")"
]
Expand All @@ -255,44 +204,23 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\HEST\\HEST\\lib\\site-packages\\hestcore\\wsi.py:27: UserWarning: CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\n",
" warnings.warn(\"CuImage is not available. Ensure you have a GPU and cucim installed to use GPU acceleration.\")\n"
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"outputs": [],
"source": [
"from hest.readers import pool_transcripts_xenium\n",
"from hest import iter_hest\n",
"\n",
"new_spot_size = 200\n",
"\n",
"new_patch_size = 224\n",
"patch_save_dir = './processed'\n",
"\n",
"# Iterate through a subset of hest\n",
"for st in iter_hest('../hest_data', id_list=['TENX95'], load_transcripts=True,):\n",
"for st in iter_hest('../hest_data', id_list=['TENX105'], load_transcripts=True,):\n",
" print(st.transcript_df)\n",
"\n",
" # Feel free to convert st.transcript_df to a Dask DataFrame if you are working with limited RAM.\n",
"\n",
" st.adata = pool_transcripts_xenium(\n",
" st.transcript_df, \n",
" st.transcript_df, # Feel free to convert st.transcript_df to a Dask DataFrame if you are working with limited RAM.\n",
" st.pixel_size, \n",
" key_x='he_x',\n",
" key_y='he_y',\n",
Expand All @@ -304,7 +232,7 @@
" st.dump_patches(\n",
" patch_save_dir,\n",
" name='demo',\n",
" target_patch_size=224, # target patch size in 224\n",
" target_patch_size=new_patch_size, # target patch size in 224\n",
" target_pixel_size=0.5 # pixel size of the patches in um/px after rescaling\n",
" )"
]
Expand Down Expand Up @@ -370,7 +298,7 @@
"from hest import iter_hest\n",
"\n",
"# Iterate through a subset of hest\n",
"for st in iter_hest('../hest_data', id_list=['TENX95'], load_transcripts=True):\n",
"for st in iter_hest('../hest_data', id_list=['TENX105'], load_transcripts=True):\n",
" print(st.transcript_df)"
]
},
Expand Down
Loading