diff --git a/lib/galaxy/datatypes/images.py b/lib/galaxy/datatypes/images.py index c44a294f07f2..ea230866711b 100644 --- a/lib/galaxy/datatypes/images.py +++ b/lib/galaxy/datatypes/images.py @@ -5,12 +5,25 @@ import base64 import json import logging -from typing import Optional +import struct +from typing import ( + Any, + Dict, + List, + Optional, + Tuple, +) import mrcfile import numpy as np import tifffile +try: + import PIL + import PIL.Image +except ImportError: + PIL = None # type: ignore[assignment, unused-ignore] + from galaxy.datatypes.binary import Binary from galaxy.datatypes.metadata import ( FileParameter, @@ -50,6 +63,70 @@ class Image(data.Data): edam_format = "format_3547" file_ext = "" + MetadataElement( + name="axes", + desc="Axes of the image data", + readonly=True, + visible=True, + optional=True, + ) + + MetadataElement( + name="dtype", + desc="Data type of the image pixels or voxels", + readonly=True, + visible=True, + optional=True, + ) + + MetadataElement( + name="num_unique_values", + desc="Number of unique values in the image data (e.g., should be 2 for binary images)", + readonly=True, + visible=True, + optional=True, + ) + + MetadataElement( + name="width", + desc="Width of the image (in pixels)", + readonly=True, + visible=True, + optional=True, + ) + + MetadataElement( + name="height", + desc="Height of the image (in pixels)", + readonly=True, + visible=True, + optional=True, + ) + + MetadataElement( + name="channels", + desc="Number of channels of the image", + readonly=True, + visible=True, + optional=True, + ) + + MetadataElement( + name="depth", + desc="Depth of the image (number of slices)", + readonly=True, + visible=True, + optional=True, + ) + + MetadataElement( + name="frames", + desc="Number of frames in the image sequence (number of time steps)", + readonly=True, + visible=True, + optional=True, + ) + def __init__(self, **kwd): super().__init__(**kwd) self.image_formats = [self.file_ext.upper()] @@ -73,6 +150,40 @@ def handle_dataset_as_image(self, hda: DatasetProtocol) -> str: base64_image_data = base64.b64encode(f.read()).decode("utf-8") return f"![{name}](data:image/{self.file_ext};base64,{base64_image_data})" + def set_meta( + self, dataset: DatasetProtocol, overwrite: bool = True, metadata_tmp_files_dir: Optional[str] = None, **kwd + ) -> None: + """ + Try to populate the metadata of the image using a generic image loading library (pillow), if available. + + If an image has two axes, they are assumed to be ``YX``. If an image has three axes, they are assumed to be ``YXC``. + """ + if PIL is not None: + try: + with PIL.Image.open(dataset.get_file_name()) as im: + + # Determine the metadata values that are available without loading the image data + dataset.metadata.width = im.size[1] + dataset.metadata.height = im.size[0] + dataset.metadata.depth = 0 + dataset.metadata.frames = getattr(im, "n_frames", 0) + dataset.metadata.num_unique_values = sum(val > 0 for val in im.histogram()) + + # Peek into a small 2x2 section of the image data + im_peek_arr = np.array(im.crop((0, 0, min((2, im.size[1])), min((2, im.size[0]))))) + + # Determine the remaining metadata values + dataset.metadata.dtype = str(im_peek_arr.dtype) + if im_peek_arr.ndim == 2: + dataset.metadata.axes = "YX" + dataset.metadata.channels = 0 + elif im_peek_arr.ndim == 3: + dataset.metadata.axes = "YXC" + dataset.metadata.channels = im_peek_arr.shape[2] + + except PIL.UnidentifiedImageError: + pass + class Jpg(Image): edam_format = "format_3579" @@ -104,17 +215,95 @@ class Tiff(Image): def set_meta( self, dataset: DatasetProtocol, overwrite: bool = True, metadata_tmp_files_dir: Optional[str] = None, **kwd ) -> None: + """ + Populate the metadata of the TIFF image using the tifffile library. + """ spec_key = "offsets" - offsets_file = dataset.metadata.offsets - if not offsets_file: - offsets_file = dataset.metadata.spec[spec_key].param.new_file( - dataset=dataset, metadata_tmp_files_dir=metadata_tmp_files_dir - ) - with tifffile.TiffFile(dataset.get_file_name()) as tif: - offsets = [page.offset for page in tif.pages] - with open(offsets_file.get_file_name(), "w") as f: - json.dump(offsets, f) - dataset.metadata.offsets = offsets_file + if hasattr(dataset.metadata, spec_key): + offsets_file = dataset.metadata.offsets + if not offsets_file: + offsets_file = dataset.metadata.spec[spec_key].param.new_file( + dataset=dataset, metadata_tmp_files_dir=metadata_tmp_files_dir + ) + else: + offsets_file = None + try: + with tifffile.TiffFile(dataset.get_file_name()) as tif: + offsets = [page.offset for page in tif.pages] + + # Aggregate a list of values for each metadata field (one value for each page of the TIFF file) + metadata: Dict[str, List[Any]] = { + key: [] + for key in [ + "axes", + "dtype", + "width", + "height", + "channels", + "depth", + "frames", + "num_unique_values", + ] + } + for page in tif.series: + + # Determine the metadata values that should be generally available + metadata["axes"].append(page.axes.upper()) + metadata["dtype"].append(str(page.dtype)) + + axes = metadata["axes"][-1].replace("S", "C") + metadata["width"].append(Tiff._get_axis_size(page.shape, axes, "X")) + metadata["height"].append(Tiff._get_axis_size(page.shape, axes, "Y")) + metadata["channels"].append(Tiff._get_axis_size(page.shape, axes, "C")) + metadata["depth"].append(Tiff._get_axis_size(page.shape, axes, "Z")) + metadata["frames"].append(Tiff._get_axis_size(page.shape, axes, "T")) + + # Determine the metadata values that require reading the image data + try: + im_arr = page.asarray() + metadata["num_unique_values"].append(len(np.unique(im_arr))) + except ValueError: # Occurs if the compression of the TIFF file is unsupported + pass + + # Populate the metadata fields based on the values determined above + for key, values in metadata.items(): + if len(values) > 0: + + # Populate as plain value, if there is just one value, and as a list otherwise + if len(values) == 1: + setattr(dataset.metadata, key, values[0]) + else: + setattr(dataset.metadata, key, values) + + # Populate the "offsets" file and metadata field + if offsets_file: + with open(offsets_file.get_file_name(), "w") as f: + json.dump(offsets, f) + dataset.metadata.offsets = offsets_file + + # Catch errors from deep inside the tifffile library + except ( + AttributeError, + IndexError, + KeyError, + OSError, + RuntimeError, + struct.error, + tifffile.OmeXmlError, + tifffile.TiffFileError, + TypeError, + ValueError, + ): + pass + + @staticmethod + def _get_axis_size(shape: Tuple[int, ...], axes: str, axis: str) -> int: + idx = axes.find(axis) + return shape[idx] if idx >= 0 else 0 + + def sniff(self, filename: str) -> bool: + with tifffile.TiffFile(filename): + return True class OMETiff(Tiff): diff --git a/lib/galaxy/datatypes/test/im1_uint8.png b/lib/galaxy/datatypes/test/im1_uint8.png new file mode 120000 index 000000000000..14bf0f2f97ae --- /dev/null +++ b/lib/galaxy/datatypes/test/im1_uint8.png @@ -0,0 +1 @@ +../../../../test-data/im1_uint8.png \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im1_uint8.tif b/lib/galaxy/datatypes/test/im1_uint8.tif new file mode 120000 index 000000000000..4ad037a48087 --- /dev/null +++ b/lib/galaxy/datatypes/test/im1_uint8.tif @@ -0,0 +1 @@ +../../../../test-data/im1_uint8.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im2_a.png b/lib/galaxy/datatypes/test/im2_a.png new file mode 120000 index 000000000000..ac8129a75e59 --- /dev/null +++ b/lib/galaxy/datatypes/test/im2_a.png @@ -0,0 +1 @@ +../../../../test-data/im2_a.png \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im2_b.png b/lib/galaxy/datatypes/test/im2_b.png new file mode 120000 index 000000000000..4658fdfaba03 --- /dev/null +++ b/lib/galaxy/datatypes/test/im2_b.png @@ -0,0 +1 @@ +../../../../test-data/im2_b.png \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im3_a.png b/lib/galaxy/datatypes/test/im3_a.png new file mode 120000 index 000000000000..8f8c28572e7c --- /dev/null +++ b/lib/galaxy/datatypes/test/im3_a.png @@ -0,0 +1 @@ +../../../../test-data/im3_a.png \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im3_b.tif b/lib/galaxy/datatypes/test/im3_b.tif new file mode 120000 index 000000000000..ae96a05142f6 --- /dev/null +++ b/lib/galaxy/datatypes/test/im3_b.tif @@ -0,0 +1 @@ +../../../../test-data/im3_b.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im4_float.tif b/lib/galaxy/datatypes/test/im4_float.tif new file mode 120000 index 000000000000..6f201e845d8e --- /dev/null +++ b/lib/galaxy/datatypes/test/im4_float.tif @@ -0,0 +1 @@ +../../../../test-data/im4_float.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im5_uint8.tif b/lib/galaxy/datatypes/test/im5_uint8.tif new file mode 120000 index 000000000000..ec7f5f0220a8 --- /dev/null +++ b/lib/galaxy/datatypes/test/im5_uint8.tif @@ -0,0 +1 @@ +../../../../test-data/im5_uint8.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im6_uint8.tif b/lib/galaxy/datatypes/test/im6_uint8.tif new file mode 120000 index 000000000000..a2723a76d805 --- /dev/null +++ b/lib/galaxy/datatypes/test/im6_uint8.tif @@ -0,0 +1 @@ +../../../../test-data/im6_uint8.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im7_uint8.tif b/lib/galaxy/datatypes/test/im7_uint8.tif new file mode 120000 index 000000000000..fd4533a3570b --- /dev/null +++ b/lib/galaxy/datatypes/test/im7_uint8.tif @@ -0,0 +1 @@ +../../../../test-data/im7_uint8.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im8_uint16.tif b/lib/galaxy/datatypes/test/im8_uint16.tif new file mode 120000 index 000000000000..b61c52c6d617 --- /dev/null +++ b/lib/galaxy/datatypes/test/im8_uint16.tif @@ -0,0 +1 @@ +../../../../test-data/im8_uint16.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im9_multipage.tif b/lib/galaxy/datatypes/test/im9_multipage.tif new file mode 120000 index 000000000000..64f01346fded --- /dev/null +++ b/lib/galaxy/datatypes/test/im9_multipage.tif @@ -0,0 +1 @@ +../../../../test-data/im9_multipage.tif \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/im_empty.tif b/lib/galaxy/datatypes/test/im_empty.tif new file mode 120000 index 000000000000..6849781b4273 --- /dev/null +++ b/lib/galaxy/datatypes/test/im_empty.tif @@ -0,0 +1 @@ +../../../../test-data/im_empty.tif \ No newline at end of file diff --git a/test-data/im9_multipage.tif b/test-data/im9_multipage.tif new file mode 100644 index 000000000000..79df2fd85d0a Binary files /dev/null and b/test-data/im9_multipage.tif differ diff --git a/test-data/im_empty.tif b/test-data/im_empty.tif new file mode 100644 index 000000000000..997c69fe8f92 Binary files /dev/null and b/test-data/im_empty.tif differ diff --git a/test/unit/data/datatypes/test_images.py b/test/unit/data/datatypes/test_images.py new file mode 100644 index 000000000000..461bad7373df --- /dev/null +++ b/test/unit/data/datatypes/test_images.py @@ -0,0 +1,138 @@ +from typing import ( + Any, + Type, +) + +from galaxy.datatypes.images import ( + Image, + Pdf, + Tiff, +) +from .util import ( + get_dataset, + MockDatasetDataset, +) + +# Define test decorator + + +def __test(image_cls: Type[Image], input_filename: str): + + def decorator(test_impl): + + def test(): + image = image_cls() + with get_dataset(input_filename) as dataset: + dataset.dataset = MockDatasetDataset(dataset.get_file_name()) + image.set_meta(dataset) + test_impl(dataset.metadata) + + return test + + return decorator + + +# Define test factory + + +def __create_test(image_cls: Type[Image], input_filename: str, metadata_key: str, expected_value: Any): + + @__test(image_cls, input_filename) + def test(metadata): + assert getattr(metadata, metadata_key) == expected_value + + return test + + +# Define test utilities + + +def __assert_empty_metadata(metadata): + for key in ( + "axes", + "dtype", + "num_unique_values", + "width", + "height", + "channels", + "depth", + "frames", + ): + assert getattr(metadata, key, None) is None + + +# Tests with TIFF files + +test_tiff_axes_yx = __create_test(Tiff, "im1_uint8.tif", "axes", "YX") +test_tiff_axes_zcyx = __create_test(Tiff, "im6_uint8.tif", "axes", "ZCYX") +test_tiff_dtype_uint8 = __create_test(Tiff, "im6_uint8.tif", "dtype", "uint8") +test_tiff_dtype_uint16 = __create_test(Tiff, "im8_uint16.tif", "dtype", "uint16") +test_tiff_dtype_float64 = __create_test(Tiff, "im4_float.tif", "dtype", "float64") +test_tiff_num_unique_values_2 = __create_test(Tiff, "im3_b.tif", "num_unique_values", 2) +test_tiff_num_unique_values_618 = __create_test(Tiff, "im4_float.tif", "num_unique_values", 618) +test_tiff_width_16 = __create_test(Tiff, "im7_uint8.tif", "width", 16) # axes: ZYX +test_tiff_width_32 = __create_test(Tiff, "im3_b.tif", "width", 32) # axes: YXS +test_tiff_height_8 = __create_test(Tiff, "im7_uint8.tif", "height", 8) # axes: ZYX +test_tiff_height_32 = __create_test(Tiff, "im3_b.tif", "height", 32) # axes: YXS +test_tiff_channels_0 = __create_test(Tiff, "im1_uint8.tif", "channels", 0) +test_tiff_channels_2 = __create_test(Tiff, "im5_uint8.tif", "channels", 2) # axes: CYX +test_tiff_channels_3 = __create_test(Tiff, "im3_b.tif", "channels", 3) # axes: YXS +test_tiff_depth_0 = __create_test(Tiff, "im1_uint8.tif", "depth", 0) # axes: YXS +test_tiff_depth_25 = __create_test(Tiff, "im7_uint8.tif", "depth", 25) # axes: ZYX +test_tiff_frames_0 = __create_test(Tiff, "im1_uint8.tif", "frames", 0) # axes: YXS +test_tiff_frames_5 = __create_test(Tiff, "im8_uint16.tif", "frames", 5) # axes: TYX + + +@__test(Tiff, "im_empty.tif") +def test_tiff_empty(metadata): + __assert_empty_metadata(metadata) + + +@__test(Tiff, "1.tiff") +def test_tiff_unsupported_compression(metadata): + # If the compression of a TIFF is unsupported, some fields should still be there + assert metadata.axes == "YX" + assert metadata.dtype == "bool" + assert metadata.width == 1728 + assert metadata.height == 2376 + assert metadata.channels == 0 + assert metadata.depth == 0 + assert metadata.frames == 0 + + # The other fields should be missing + assert getattr(metadata, "num_unique_values", None) is None + + +@__test(Tiff, "im9_multipage.tif") +def test_tiff_multipage(metadata): + assert metadata.axes == ["YXS", "YX"] + assert metadata.dtype == ["uint8", "uint16"] + assert metadata.num_unique_values == [2, 255] + assert metadata.width == [32, 256] + assert metadata.height == [32, 256] + assert metadata.channels == [3, 0] + assert metadata.depth == [0, 0] + assert metadata.frames == [0, 0] + + +# Tests with PNG files + +test_png_axes_yx = __create_test(Image, "im1_uint8.png", "axes", "YX") +test_png_axes_yxc = __create_test(Image, "im3_a.png", "axes", "YXC") +test_png_dtype_uint8 = __create_test(Image, "im1_uint8.png", "dtype", "uint8") +test_png_num_unique_values_1 = __create_test(Image, "im2_a.png", "num_unique_values", 1) +test_png_num_unique_values_2 = __create_test(Image, "im2_b.png", "num_unique_values", 2) +test_png_width_32 = __create_test(Image, "im2_b.png", "width", 32) +test_png_height_32 = __create_test(Image, "im2_b.png", "height", 32) +test_png_channels_0 = __create_test(Image, "im1_uint8.png", "channels", 0) +test_png_channels_3 = __create_test(Image, "im3_a.png", "channels", 3) +test_png_depth_0 = __create_test(Image, "im1_uint8.png", "depth", 0) +test_png_frames_1 = __create_test(Image, "im1_uint8.png", "frames", 1) + + +# Test with files that neither Pillow nor tifffile can open + + +@__test(Pdf, "454Score.pdf") +def test_unsupported_metadata(metadata): + __assert_empty_metadata(metadata)