Skip to content

Commit

Permalink
feat: Integrate geopandas for reading qupath annotations
Browse files Browse the repository at this point in the history
qupath_utils:
-  pyproject: include geopandas as new dependency
- data_utils: new function to read json with geopandas, reformat docs of others functions
- qupath_utils: ensure contoursToPolygons returns valid polygons, modify read_qupath_annotations to read annotations with geopandas, enable saving as feature collections in export_polygons_to_qupath, and use built-in function from shapely to create box in patchesToPolygons
  • Loading branch information
loic-lb committed Jul 4, 2024
1 parent 04bd6f5 commit e14966c
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 61 deletions.
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ umap-learn = {extras = ["plot"], version = "^0.5.5"}
cellpose = {version = "^3.0.7", optional = true}
dask = {extras = ["dataframe"], version = "^2024.3.1", optional = true}
ipywidgets = { version = "^8.1.2", optional = true }
geopandas = {version = "^1.0.1", optional = true}

[tool.poetry.group.dev.dependencies]
black = "^22.8.0"
Expand All @@ -52,7 +53,8 @@ pyment = "^0.3.3"
pytest-subtests = "^0.12.1"

[tool.poetry.extras]
emb = ["torch", "torchvision", "tqdm", "transformers", "timm", "scikit-learn", "huggingface-hub", "umap-learn", "dask"]
emb = ["torch", "torchvision", "tqdm", "transformers", "timm", "scikit-learn",
"huggingface-hub", "umap-learn", "dask", "geopandas"]
seg = ["torch", "torchvision", "cellpose", "ipywidgets"]

[build-system]
Expand Down
74 changes: 53 additions & 21 deletions src/prismtoolbox/utils/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,90 @@
from __future__ import annotations

import pickle
import json
import h5py
import geopandas as gpd
import numpy as np
from typing import Tuple, Any


def save_obj_with_pickle(obj: object, file_path: str) -> None:
"""
Save an object to a file using pickle.
:param obj: a pickeable object
:param file_path: path to the file
"""Save an object to a file using pickle.
Args:
obj: A pickeable object.
file_path: The path to the file.
"""
with open(file_path, "wb") as f:
pickle.dump(obj, f)


def save_obj_with_json(obj: object, file_path: str) -> None:
"""
Save an object to a file using json.
:param obj: a json object
:param file_path: path to the file
"""Save an object to a file using json.
Args:
obj: A json object.
file_path: The path to the file.
"""
with open(file_path, "w") as f:
json.dump(obj, f)


def load_obj_with_pickle(file_path: str) -> Any:
"""
Load an object from a file using pickle.
:param file_path: path to a pickle file
:return: a pickeable object from the file
"""Load an object from a file using pickle.
Args:
file_path: The path to the pickle file.
Returns:
A pickeable object from the file.
"""
with open(file_path, "rb") as f:
return pickle.load(f)


def load_obj_with_json(file_path: str) -> Any:
"""
Load an object from a file using json.
:param file_path: path to a json file
:return: a json object from the file
"""Load an object from a file using json.
Args:
file_path: The path to the json file.
Returns:
A json object from the file.
"""
with open(file_path, "r") as f:
return json.load(f)


def read_h5_file(file_path: str, key: str) -> Tuple[np.ndarray, dict]:
"""
Read an object from a h5 file.
:param file_path: path to a h5 file
:param key: key to the object in the h5 file
:return: an object from the h5 file
"""Read an object from a h5 file.
Args:
file_path: The path to the h5 file.
key: The key to select the dataset in the h5 file.
Returns:
A dataset from the h5 file.
"""
with h5py.File(file_path, "r") as f:
object = f[key][()]
attrs = {key: value for key, value in f[key].attrs.items()}
return object, attrs

def read_json_with_geopandas(file_path: str, offset: tuple[int, int] = (0, 0)) -> gpd.GeoDataFrame:
"""Read a json file with geopandas.
Args:
file_path: The path to a json file.
Returns:
A GeoDataFrame object from the json file.
"""
data = load_obj_with_json(file_path)
df = gpd.GeoDataFrame.from_features(data)
df.translate(xoff=offset[0], yoff=offset[1])
if not df.is_valid.any():
df.loc[~df.is_valid,:] = df.loc[~df.is_valid, :].buffer(0)
if "classification" in df.columns:
df["classification"] = df["classification"].apply(lambda x: x["name"])
return df
62 changes: 23 additions & 39 deletions src/prismtoolbox/utils/qupath_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import os
import uuid
import numpy as np
from shapely import MultiPolygon, Polygon
import geopandas
from shapely import MultiPolygon, Polygon, box
from shapely.geometry import mapping, shape
from shapely.affinity import translate
from shapely.ops import unary_union
from typing import Optional, Tuple, List, Union
from .data_utils import load_obj_with_json, save_obj_with_json
from .data_utils import load_obj_with_json, save_obj_with_json, read_json_with_geopandas


def contoursToPolygons(
Expand All @@ -30,6 +31,8 @@ def contoursToPolygons(
else:
result.append(poly)
polygons = MultiPolygon(result)
if not polygons.is_valid:
polygons = polygons.buffer(0)
if merge:
polygons = unary_union(polygons)
return polygons
Expand All @@ -46,33 +49,21 @@ def PolygonsToContours(polygons: MultiPolygon):
for poly in polygons.geoms
]


def read_qupath_annotations(path: str, offset: Optional[Tuple[int, int]] = (0, 0)):
"""Reads QuPath annotations from a .geojson file.
def read_qupath_annotations(path: str, offset: Optional[Tuple[int, int]] = (0, 0), class_name: str = "annotation"):
"""Reads pathologist annotations from a .geojson file.
:param path: path to the .geojson file
:param offset: optional offset to add to each coordinate in the arrays
:return: list of contours
:return:
"""
data = load_obj_with_json(path)
polygons = []
for feature in data["features"]:
if feature["geometry"]["type"] == "Polygon":
polygons.append(shape(feature["geometry"]))
elif feature["geometry"]["type"] == "MultiPolygon":
polygons.extend(shape(feature["geometry"]).geoms)
else:
raise ValueError(
"Feature type not recognized in .geojson file, please provide a .geojson file with only "
"Polygon or MultiPolygon features."
)
df = read_json_with_geopandas(path, offset)
column_to_select = "classification" if "classification" in df.columns else "objectType"
polygons = df.loc[df[column_to_select] == class_name, "geometry"].values
polygons = MultiPolygon(polygons)
polygons = translate(polygons, xoff=offset[0], yoff=offset[1])
if not polygons.is_valid:
polygons = polygons.buffer(0)
return polygons


def convert_rgb_to_java_int_signed(rgb: Tuple[int, int, int]) -> int:
"""Converts RGB tuple to Java signed integer.
Expand All @@ -94,6 +85,7 @@ def export_polygons_to_qupath(
label: Optional[str] = None,
color: Optional[Tuple[int, int, int]] = None,
append_to_existing_file: Optional[bool] = False,
as_feature_collection: Optional[bool] = False,
):
"""Exports polygons to a .json or .geojson file.
Expand All @@ -104,10 +96,10 @@ def export_polygons_to_qupath(
:param label: optional label of the polygons
:param color: optional color of the polygons
:param append_to_existing_file: optional boolean to append the polygons to an existing file
:param as_feature_collection: optional boolean to save the polygons as a FeatureCollection
"""
if isinstance(polygons, Polygon):
polygons = MultiPolygon([polygons])
# features = {"type": "FeatureCollection", "features": []}
features = []
properties = {"objectType": object_type}
if label is not None:
Expand All @@ -117,24 +109,25 @@ def export_polygons_to_qupath(
}
polygons = translate(polygons, xoff=offset[0], yoff=offset[1])
for poly in polygons.geoms:
# features.append({"type": "Feature", "geometry": mapping(poly)})
features.append(
{
features.append(
{
"type": "Feature",
"id": str(uuid.uuid4()),
"geometry": mapping(poly),
"properties": properties,
}
)
})
features = {"type": "FeatureCollection", "features": features} if as_feature_collection else features
if os.path.exists(path) and append_to_existing_file:
previous_features = load_obj_with_json(path)
if len(previous_features) == 0:
logging.warning(
"The .geojson file does not contain any features, creating new file."
)
else:
previous_features.extend(features)
# previous_features["features"].extend(features["features"])
if as_feature_collection:
previous_features["features"].extend(features["features"])
else:
previous_features.extend(features)
features = previous_features
save_obj_with_json(features, path)

Expand Down Expand Up @@ -172,7 +165,7 @@ def patchesToPolygons(
) -> Union[Polygon, MultiPolygon]:
"""Converts patches to shapely polygons.
:param patches: patches to convert to shapely polygons
:param patches: Top left point coordinates of the patches to convert to shapely polygons
:param patch_size: size of the patches
:param offset: optional offset to add to each coordinate in the arrays
:param merge: optional boolean to merge the polygons
Expand All @@ -182,16 +175,7 @@ def patchesToPolygons(
ref_patch_size = patch_size * patch_downsample
for patch in patches:
x, y = patch
polygons.append(
Polygon(
[
(x, y),
(x + ref_patch_size, y),
(x + ref_patch_size, y + ref_patch_size),
(x, y + ref_patch_size),
]
)
)
polygons.append(box(x, y, x + ref_patch_size, y + ref_patch_size, ccw=False))
polygons = MultiPolygon(polygons)
if merge:
polygons = unary_union(polygons)
Expand Down

0 comments on commit e14966c

Please sign in to comment.