xenium supports aligned images; renamed cells_as_shapes

quentinblampey · Jan 22, 2024 · 596e403 · 596e403
1 parent 48d7d6f
commit 596e403
Show file tree

Hide file tree

Showing 4 changed files with 191 additions and 16 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,12 +13,14 @@ and this project adheres to [Semantic Versioning][].
 ### Added
 
 -   (MCMICRO) support for TMAs (such as the data of exemplar-002)
+-   (Xenium) support for post-xenium aligned images (IF, HE)
 
 ### Fixed
 
 -   (MERSCOPE) don't try to load unexisting elements #87
 -   (Visium) fixed axes ordering
 -   (Xenium) fixed index (fail on write)
+-   (Xenium) renamed cells_as_shapes to cells_as_circles; set default to True
 
 ## [0.0.9] - 2023-11-06
 

diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
@@ -92,10 +92,18 @@ class XeniumKeys(ModeEnum):
     CELL_AREA = "cell_area"
     CELL_NUCLEUS_AREA = "nucleus_area"
 
-    # morphology iamges
+    # morphology images
     MORPHOLOGY_MIP_FILE = "morphology_mip.ome.tif"
     MORPHOLOGY_FOCUS_FILE = "morphology_focus.ome.tif"
 
+    # post-xenium images
+    ALIGNED_IF_IMAGE_SUFFIX = "if_image.ome.tif"
+    ALIGNED_HE_IMAGE_SUFFIX = "he_image.ome.tif"
+
+    # image alignment suffix
+    ALIGNMENT_FILE_SUFFIX_TO_REMOVE = ".ome.tif"
+    ALIGNMENT_FILE_SUFFIX_TO_ADD = "alignment.csv"
+
 
 @unique
 class VisiumKeys(ModeEnum):

diff --git a/src/spatialdata_io/_utils.py b/src/spatialdata_io/_utils.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+import functools
+import warnings
+from typing import Any, Callable, TypeVar
+
+RT = TypeVar("RT")
+
+
+# these two functions should be removed and imported from spatialdata._utils once the multi_table branch, which
+# introduces them, is merged
+def deprecation_alias(**aliases: str) -> Callable[[Callable[..., RT]], Callable[..., RT]]:
+    """
+    Decorate a function to warn user of use of arguments set for deprecation.
+
+    Parameters
+    ----------
+    aliases
+        Deprecation argument aliases to be mapped to the new arguments.
+
+    Returns
+    -------
+    A decorator that can be used to mark an argument for deprecation and substituting it with the new argument.
+
+    Raises
+    ------
+    TypeError
+        If the provided aliases are not of string type.
+
+    Example
+    -------
+    Assuming we have an argument 'table' set for deprecation and we want to warn the user and substitute with 'tables':
+
+    .. code-block:: python
+
+        @deprecation_alias(table="tables")
+        def my_function(tables):
+            pass
+    """
+
+    def deprecation_decorator(f: Callable[..., RT]) -> Callable[..., RT]:
+        @functools.wraps(f)
+        def wrapper(*args: Any, **kwargs: Any) -> RT:
+            class_name = f.__qualname__
+            rename_kwargs(f.__name__, kwargs, aliases, class_name)
+            return f(*args, **kwargs)
+
+        return wrapper
+
+    return deprecation_decorator
+
+
+def rename_kwargs(func_name: str, kwargs: dict[str, Any], aliases: dict[str, str], class_name: None | str) -> None:
+    """Rename function arguments set for deprecation and gives warning in case of usage of these arguments."""
+    for alias, new in aliases.items():
+        if alias in kwargs:
+            class_name = class_name + "." if class_name else ""
+            if new in kwargs:
+                raise TypeError(
+                    f"{class_name}{func_name} received both {alias} and {new} as arguments!"
+                    f" {alias} is being deprecated, please only use {new} instead."
+                )
+            warnings.warn(
+                message=(
+                    f"`{alias}` is being deprecated as an argument to `{class_name}{func_name}` in SpatialData "
+                    f"version 0.1, switch to `{new}` instead."
+                ),
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            kwargs[new] = kwargs.pop(alias)
diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py
@@ -21,20 +21,22 @@
 from spatialdata import SpatialData
 from spatialdata._types import ArrayLike
 from spatialdata.models import Image2DModel, PointsModel, ShapesModel, TableModel
-from spatialdata.transformations.transformations import Identity, Scale
+from spatialdata.transformations.transformations import Affine, Identity, Scale
 
 from spatialdata_io._constants._constants import XeniumKeys
 from spatialdata_io._docs import inject_docs
+from spatialdata_io._utils import deprecation_alias
 from spatialdata_io.readers._utils._read_10x_h5 import _read_10x_h5
 
 __all__ = ["xenium"]
 
 
+@deprecation_alias(cells_as_shapes="cells_as_circles")
 @inject_docs(xx=XeniumKeys)
 def xenium(
     path: str | Path,
     n_jobs: int = 1,
-    cells_as_shapes: bool = False,
+    cells_as_circles: bool = False,
     nucleus_boundaries: bool = True,
     transcripts: bool = True,
     morphology_mip: bool = True,
@@ -66,8 +68,8 @@ def xenium(
         Path to the dataset.
     n_jobs
         Number of jobs to use for parallel processing.
-    cells_as_shapes
-        Whether to read cells also as shapes. Useful for visualization.
+    cells_as_circles
+        Whether to read cells also as circles. Useful for performant visualization.
     nucleus_boundaries
         Whether to read nucleus boundaries.
     transcripts
@@ -100,9 +102,9 @@ def xenium(
     with open(path / XeniumKeys.XENIUM_SPECS) as f:
         specs = json.load(f)
 
-    specs["region"] = "cell_circles" if cells_as_shapes else "cell_boundaries"
-    return_values = _get_tables_and_circles(path, cells_as_shapes, specs)
-    if cells_as_shapes:
+    specs["region"] = "cell_circles" if cells_as_circles else "cell_boundaries"
+    return_values = _get_tables_and_circles(path, cells_as_circles, specs)
+    if cells_as_circles:
         table, circles = return_values
     else:
         table = return_values
@@ -134,21 +136,27 @@ def xenium(
         images["morphology_mip"] = _get_images(
             path,
             XeniumKeys.MORPHOLOGY_MIP_FILE,
-            specs,
             imread_kwargs,
             image_models_kwargs,
         )
     if morphology_focus:
         images["morphology_focus"] = _get_images(
             path,
             XeniumKeys.MORPHOLOGY_FOCUS_FILE,
-            specs,
             imread_kwargs,
             image_models_kwargs,
         )
-    if cells_as_shapes:
-        return SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table)
-    return SpatialData(images=images, shapes=polygons, points=points, table=table)
+    if cells_as_circles:
+        sdata = SpatialData(images=images, shapes=polygons | {specs["region"]: circles}, points=points, table=table)
+    else:
+        sdata = SpatialData(images=images, shapes=polygons, points=points, table=table)
+
+    # find and add additional aligned images
+    aligned_images = _add_aligned_images(path, imread_kwargs, image_models_kwargs)
+    for key, value in aligned_images.items():
+        sdata.images[key] = value
+
+    return sdata
 
 
 def _get_polygons(
@@ -189,7 +197,7 @@ def _get_points(path: Path, specs: dict[str, Any]) -> Table:
 
 
 def _get_tables_and_circles(
-    path: Path, cells_as_shapes: bool, specs: dict[str, Any]
+    path: Path, cells_as_circles: bool, specs: dict[str, Any]
 ) -> AnnData | tuple[AnnData, AnnData]:
     adata = _read_10x_h5(path / XeniumKeys.CELL_FEATURE_MATRIX_FILE)
     metadata = pd.read_parquet(path / XeniumKeys.CELL_METADATA_FILE)
@@ -203,7 +211,7 @@ def _get_tables_and_circles(
     if isinstance(adata.obs[XeniumKeys.CELL_ID].iloc[0], bytes):
         adata.obs[XeniumKeys.CELL_ID] = adata.obs[XeniumKeys.CELL_ID].apply(lambda x: x.decode("utf-8"))
     table = TableModel.parse(adata, region=specs["region"], region_key="region", instance_key=str(XeniumKeys.CELL_ID))
-    if cells_as_shapes:
+    if cells_as_circles:
         transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
         radii = np.sqrt(adata.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi)
         circles = ShapesModel.parse(
@@ -220,11 +228,97 @@ def _get_tables_and_circles(
 def _get_images(
     path: Path,
     file: str,
-    specs: dict[str, Any],
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
     image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> SpatialImage | MultiscaleSpatialImage:
     image = imread(path / file, **imread_kwargs)
     return Image2DModel.parse(
         image, transformations={"global": Identity()}, dims=("c", "y", "x"), **image_models_kwargs
     )
+
+
+def _add_aligned_images(
+    path: Path,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> dict[str, MultiscaleSpatialImage]:
+    """Discover and parse aligned images."""
+    images = {}
+    ome_tif_files = list(path.glob("*.ome.tif"))
+    csv_files = list(path.glob("*.csv"))
+    for file in ome_tif_files:
+        element_name = None
+        for suffix in [XeniumKeys.ALIGNED_HE_IMAGE_SUFFIX, XeniumKeys.ALIGNED_IF_IMAGE_SUFFIX]:
+            if file.name.endswith(suffix):
+                element_name = suffix.replace(XeniumKeys.ALIGNMENT_FILE_SUFFIX_TO_REMOVE, "")
+                break
+        if element_name is not None:
+            # check if an alignment file exists
+            expected_filename = file.name.replace(
+                XeniumKeys.ALIGNMENT_FILE_SUFFIX_TO_REMOVE, XeniumKeys.ALIGNMENT_FILE_SUFFIX_TO_ADD
+            )
+            alignment_files = [f for f in csv_files if f.name == expected_filename]
+            assert len(alignment_files) <= 1, f"Found more than one alignment file for {file.name}."
+            alignment_file = alignment_files[0] if alignment_files else None
+
+            # parse the image
+            image = xenium_aligned_image(file, alignment_file, imread_kwargs, image_models_kwargs)
+            images[element_name] = image
+    return images
+
+
+def xenium_aligned_image(
+    image_path: str | Path,
+    alignment_file: str | Path | None,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> MultiscaleSpatialImage:
+    """
+    Read an image aligned to a Xenium dataset, with an optional alignment file.
+
+    Parameters
+    ----------
+    image_path
+        Path to the image.
+    alignment_file
+        Path to the alignment file, if not passed it is assumed that the image is aligned.
+    image_models_kwargs
+        Keyword arguments to pass to the image models.
+
+    Returns
+    -------
+    The single-scale or multi-scale aligned image element.
+    """
+    image_path = Path(image_path)
+    assert image_path.exists(), f"File {image_path} does not exist."
+    image = imread(image_path, **imread_kwargs)
+
+    # Depending on the version of pipeline that was used, some images have shape (1, y, x, 3) and others (3, y, x)
+    # since y and x are always different from 1, let's differentiate from the two cases here, independently of the
+    # pipeline version.
+    # Note that a more robust approach is to look at the xml metadata in the ome.tif; we could use this in a future PR.
+    print(image.shape)
+    if len(image.shape) == 4:
+        assert image.shape[0] == 1
+        assert image.shape[-1] == 3
+        image = image.squeeze(0)
+        dims = ("y", "x", "c")
+    else:
+        assert len(image.shape) == 3
+        assert image.shape[0] == 3
+        dims = ("c", "y", "x")
+
+    if alignment_file is None:
+        transformation = Identity()
+    else:
+        alignment_file = Path(alignment_file)
+        assert alignment_file.exists(), f"File {alignment_file} does not exist."
+        alignment = pd.read_csv(alignment_file, header=None).values
+        transformation = Affine(alignment, input_axes=("x", "y"), output_axes=("x", "y"))
+
+    return Image2DModel.parse(
+        image,
+        dims=dims,
+        transformations={"global": transformation},
+        **image_models_kwargs,
+    )