Skip to content

Commit

Permalink
HESTData: provide util to map ensemble ID to gene name (#71)
Browse files Browse the repository at this point in the history
* HESTData: provide util to map ensemble ID to gene name

* HESTData: filter_na=False as default for ensemble ID mapping

* add unit tests and inplace arg

---------

Co-authored-by: Paul Doucet <homedoucetpaul@gmail.com>
  • Loading branch information
konst-int-i and pauldoucet authored Nov 16, 2024
1 parent c73e610 commit 2418cec
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
45 changes: 43 additions & 2 deletions src/hest/HESTData.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,10 @@ def read_hest_wsi(wsi: WSI, width, height):

return SpatialData(tables=new_table, images=images, shapes=shapes)

def ensembleID_to_gene(self):
ensembleID_to_gene(self, inplace=True)


class VisiumHESTData(HESTData):
def __init__(self,
adata: sc.AnnData, # type: ignore
Expand Down Expand Up @@ -1239,11 +1243,48 @@ def unify_gene_names(adata: sc.AnnData, species="human", drop=False) -> sc.AnnDa

if drop:
adata = adata[:, ~remaining]

# TODO return dict map of renamed, and remaining

return adata

def ensembleID_to_gene(st: HESTData, inplace=False, filter_na = False) -> HESTData:
"""
Converts ensemble gene IDs of a HESTData object using Biomart annotations and filter out genes with no matching Ensembl ID
Args:
st (HESTData): HESTData object
inplace (bool): whenever to perform the changes in placce. Defaults to True.
filter_na (bool): whenever to filter genes that are not valid ensemble IDs. Defaults to False.
Returns:
HESTData: HESTData object with gene names instead of ensemble gene IDs
"""
import scanpy as sc
if not inplace:
st = st.copy()

import scanpy as sc
species = st.meta['species']
org = "hsapiens" if species == "Homo sapiens" else "mmusculus"

annotations = sc.queries.biomart_annotations(org=org,attrs=['ensembl_gene_id', 'external_gene_name'], use_cache=True)
ensembl_to_gene_name = dict(zip(annotations['ensembl_gene_id'], annotations['external_gene_name']))


st.adata.var['gene_name'] = st.adata.var_names.map(ensembl_to_gene_name, na_action=None)

if filter_na:
st.adata.var_names = st.adata.var['gene_name'].fillna('')
else:
st.adata.var['gene_name'] = st.adata.var['gene_name'].where(st.adata.var['gene_name'].notna(), st.adata.var_names)

valid_genes = st.adata.var['gene_name'].notna()
st.adata = st.adata[:, valid_genes]


return st


def save_spatial_plot(adata: sc.AnnData, save_path: str, name: str='', key='total_counts', pl_kwargs={}):
"""Save the spatial plot from that sc.AnnData
Expand Down
5 changes: 3 additions & 2 deletions src/hest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .utils import tiff_save, find_pixel_size_from_spot_coords, write_10X_h5, get_k_genes, SpotPacking
from .autoalign import autoalign_visium
from .readers import *
from .HESTData import HESTData, read_HESTData, load_hest, iter_hest
from .HESTData import HESTData, read_HESTData, load_hest, iter_hest, ensembleID_to_gene
from .segmentation.cell_segmenters import segment_cellvit

__all__ = [
Expand All @@ -20,5 +20,6 @@
'autoalign_visium',
'write_10X_h5',
'HESTData',
'segment_cellvit'
'segment_cellvit',
'ensembleID_to_gene'
]
7 changes: 7 additions & 0 deletions tests/hest_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from hest.autoalign import autoalign_visium
from hest.readers import VisiumReader
from hest.HESTData import ensembleID_to_gene
from hest.utils import load_image


Expand Down Expand Up @@ -131,6 +132,12 @@ def setUpClass(self):
else:
self.sts = hest.load_hest('hest_data', id_list)


def test_conversion_ensembleID(self):
for idx, st in enumerate(self.sts):
with self.subTest(st_object=idx):
ensembleID_to_gene(st)


def test_tissue_seg(self):
for idx, st in enumerate(self.sts):
Expand Down

0 comments on commit 2418cec

Please sign in to comment.