From f34edbcf7450fd127fd35bd4dd4a06c62eb89ef9 Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Wed, 8 Jun 2022 10:00:11 -0400 Subject: [PATCH 1/3] Fix horizontal line for Sphinx docs. --- rap_sitkcore/read_dcm_headers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rap_sitkcore/read_dcm_headers.py b/rap_sitkcore/read_dcm_headers.py index 2fab329..3551e7f 100644 --- a/rap_sitkcore/read_dcm_headers.py +++ b/rap_sitkcore/read_dcm_headers.py @@ -109,11 +109,11 @@ def _read_dcm_header_pydicom( should be streamed. If filepath_or_url points to a local file, this parameter is ignored. - **************************************************************************************** + ------- For this to work, the server must support the chunked transfer-encoding. If it doesn't, requests *should* treat this as a normal request and download the entire request contents before passing to pydicom dcmread. - **************************************************************************************** + -------- :returns: tuple of the following format: ( pydicom.dataset.FileDataset: the pydicom file dataset object, @@ -151,11 +151,11 @@ def read_dcm_header_pydicom( should be streamed. If filepath_or_url points to a local file, this parameter is ignored. - **************************************************************************************** + -------- For this to work, the server must support the chunked transfer-encoding. If it doesn't, requests *should* treat this as a normal request and download the entire request contents before passing to pydicom dcmread. - **************************************************************************************** + -------- :returns: a pydicom.dataset.FileDataset representing the DICOM indicated by filepath_or_url """ return _read_dcm_header_pydicom( From ba22671031369a4819fb24ef334cb1623a88b44d Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Wed, 8 Jun 2022 12:34:53 -0400 Subject: [PATCH 2/3] Move test fixtures to conftest.py Change data_paths to a pyttest fixture. --- conftest.py | 39 ++++++++++++++++++++++++++++++ test/__init__.py | 14 ----------- test/unit/test_is_dicom_xray.py | 9 ++++--- test/unit/test_read_dcm.py | 9 +++---- test/unit/test_read_dcm_headers.py | 28 ++------------------- test/unit/test_resize.py | 3 +-- 6 files changed, 52 insertions(+), 50 deletions(-) create mode 100644 conftest.py diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..bae3bc1 --- /dev/null +++ b/conftest.py @@ -0,0 +1,39 @@ +import pytest +import requests + + +@pytest.fixture(scope="module") +def data_paths(): + """ + A dictionary of test data file names to full paths. + """ + from pathlib import Path + + path_dic = {} + for p in (Path(__file__).parent / "test" / "data").glob("*"): + if p.is_file(): + path_dic[p.name] = str(p.absolute()) + return path_dic + + +@pytest.fixture(scope="session") +def remote_data_paths(): + # Get s3 URL to DICOM files in TBPORTALS + # TODO: try to work around doing this, some of the tests will break if DEPOT updates their DICOMS. + # I couldn't find an easy way to set up a local server to test our data_paths that could implement streaming. + # This could probably be implemented by modifying the python http.server BaseHTTPRequestHandler, but ideally + # there is an easier way than that. + payloads = [ + { + "directory": ( + "00069df2-2406-43b6-8c58-5f5e164c7e35/" + "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.10/" + "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.11" + ), + "objectKey": "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", + } + ] + presigned_urls = requests.post( + "https://depotapi.tbportals.niaid.nih.gov/api/Amazon/GetBulkPresignedUrls", json=payloads + ).json() + yield {payload["objectKey"]: presigned_urls[i] for i, payload in enumerate(payloads)} diff --git a/test/__init__.py b/test/__init__.py index 5222a43..e69de29 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,14 +0,0 @@ -def _data_files(): - """ - A dictionary of test data file names to full paths. - """ - from pathlib import Path - - path_dic = {} - for p in (Path(__file__).parent / "data").glob("*"): - if p.is_file(): - path_dic[p.name] = str(p.absolute()) - return path_dic - - -data_paths = _data_files() diff --git a/test/unit/test_is_dicom_xray.py b/test/unit/test_is_dicom_xray.py index 90e07ee..d0e845f 100644 --- a/test/unit/test_is_dicom_xray.py +++ b/test/unit/test_is_dicom_xray.py @@ -1,7 +1,6 @@ import rap_sitkcore.is_dicom_xray import pytest from pathlib import Path -from .. import data_paths import os import tempfile @@ -23,7 +22,7 @@ ("n12.dcm", False), # "OT" modality ], ) -def test_is_dicom_xray1(test_file, is_xray): +def test_is_dicom_xray1(test_file, is_xray, data_paths): """ """ filename = data_paths[test_file] @@ -60,6 +59,10 @@ def test_is_dicom_xray3(): assert not rap_sitkcore.is_dicom_xray(fpath) +def test_is_dicom_xray4(remote_data_paths): + print(remote_data_paths) + + @pytest.mark.parametrize( ("test_file", "is_xray"), [ @@ -83,7 +86,7 @@ def test_is_dicom_xray3(): ("square_uint8.dcm", True), ], ) -def test_is_dicom_xray4(test_file, is_xray): +def test_is_dicom_xray4(test_file, is_xray, data_paths): """Testing with strict option""" filename = data_paths[test_file] diff --git a/test/unit/test_read_dcm.py b/test/unit/test_read_dcm.py index 7ccec7e..3d9e30d 100644 --- a/test/unit/test_read_dcm.py +++ b/test/unit/test_read_dcm.py @@ -3,7 +3,6 @@ from rap_sitkcore._dicom_util import keyword_to_gdcm_tag import pytest from pathlib import Path -from .. import data_paths import SimpleITK as sitk _white_listed_dicom_tags = [ @@ -40,7 +39,7 @@ "square_uint8.dcm", ], ) -def test_read_dcm1(test_file): +def test_read_dcm1(test_file, data_paths): """ """ filename = data_paths[test_file] @@ -83,7 +82,7 @@ def test_read_dcm2(): ("2.25.298570032897489859462791131067889681111.dcm", (4000, 3000), "9eae70e3f4601c19e88feb63d31fd92e", "CR"), ], ) -def test_read_dcm3(file_name, image_size, image_md5, modality): +def test_read_dcm3(file_name, image_size, image_md5, modality, data_paths): """Test migrated from tbp_image_refresh_thumbnail""" dicom_tag_modality = "0008|0060" @@ -117,7 +116,7 @@ def test_read_dcm3(file_name, image_size, image_md5, modality): ("square_uint8.dcm", 1), ], ) -def test_read_dcm_pydicom(test_file, number_of_components): +def test_read_dcm_pydicom(test_file, number_of_components, data_paths): filename = data_paths[test_file] img = _read_dcm_pydicom(Path(filename)) @@ -148,7 +147,7 @@ def test_read_dcm_pydicom(test_file, number_of_components): ("square_uint8.dcm", False, 1), ], ) -def test_read_dcm_simpleitk(test_file, expected_exception, number_of_components): +def test_read_dcm_simpleitk(test_file, expected_exception, number_of_components, data_paths): filename = data_paths[test_file] if expected_exception: diff --git a/test/unit/test_read_dcm_headers.py b/test/unit/test_read_dcm_headers.py index fab983c..3853c3d 100644 --- a/test/unit/test_read_dcm_headers.py +++ b/test/unit/test_read_dcm_headers.py @@ -2,34 +2,10 @@ import requests import time from rap_sitkcore.read_dcm_headers import _read_dcm_header_pydicom -from .. import data_paths - - -@pytest.fixture(scope="module") -def remote_data_paths(): - # Get s3 URL to DICOM files in TBPORTALS - # TODO: try to work around doing this, some of the tests will break if DEPOT updates their DICOMS. - # I couldn't find an easy way to set up a local server to test our data_paths that could implement streaming. - # This could probably be implemented by modifying the python http.server BaseHTTPRequestHandler, but ideally - # there is an easier way than that. - payloads = [ - { - "directory": ( - "00069df2-2406-43b6-8c58-5f5e164c7e35/" - "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.10/" - "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.11" - ), - "objectKey": "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", - } - ] - presigned_urls = requests.post( - "https://depotapi.tbportals.niaid.nih.gov/api/Amazon/GetBulkPresignedUrls", json=payloads - ).json() - yield {payload["objectKey"]: presigned_urls[i] for i, payload in enumerate(payloads)} @pytest.mark.parametrize("input_file", ["1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm"]) -def test_read_dcm_headers(input_file, request): +def test_read_dcm_headers(input_file, request, data_paths, remote_data_paths): ##################################################################### # test with local file first ##################################################################### @@ -43,7 +19,7 @@ def test_read_dcm_headers(input_file, request): ##################################################################### # Next test with remote file, without streaming ##################################################################### - presigned_url = request.getfixturevalue("remote_data_paths")[input_file] + presigned_url = remote_data_paths[input_file] st = time.perf_counter() remote_dataset_wo_streaming, fp_wo_streaming, response_wo_streaming = _read_dcm_header_pydicom( diff --git a/test/unit/test_resize.py b/test/unit/test_resize.py index 7256331..664ab7b 100644 --- a/test/unit/test_resize.py +++ b/test/unit/test_resize.py @@ -1,7 +1,6 @@ from rap_sitkcore import read_dcm, resize_and_scale_uint8 import pytest from pathlib import Path -from .. import data_paths import SimpleITK as sitk @@ -13,7 +12,7 @@ ("square_uint8.dcm", (256, 256), "e8358046f74f0977f2e55df97bab0318"), ], ) -def test_resize_and_scale_uint8_1(file_name, thumbnail_size, md5_hash): +def test_resize_and_scale_uint8_1(file_name, thumbnail_size, md5_hash, data_paths): filename = data_paths[file_name] img = read_dcm(Path(filename)) From 350a8d56c0ad319e9d0325a55eb14d93963dfeeb Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Wed, 8 Jun 2022 13:57:51 -0400 Subject: [PATCH 3/3] Update is_dicom_xray to suport URLs Use read_dcm_header_pydicom to add support for URLs and filepaths. Add kwargs to read_dcm_header_pydicom to forward arguments to read dcm. --- rap_sitkcore/is_dicom_xray.py | 12 +++++---- rap_sitkcore/read_dcm_headers.py | 42 +++++++++++++++++------------- test/unit/test_is_dicom_xray.py | 14 +++++++--- test/unit/test_read_dcm_headers.py | 1 - 4 files changed, 41 insertions(+), 28 deletions(-) diff --git a/rap_sitkcore/is_dicom_xray.py b/rap_sitkcore/is_dicom_xray.py index af76259..5cd223e 100644 --- a/rap_sitkcore/is_dicom_xray.py +++ b/rap_sitkcore/is_dicom_xray.py @@ -1,6 +1,7 @@ -import pydicom from pydicom.errors import InvalidDicomError from pathlib import Path +from rap_sitkcore.read_dcm_headers import read_dcm_header_pydicom +from typing import Union import logging @@ -14,7 +15,7 @@ ) -def is_dicom_xray(filename: Path, strict: bool = False) -> bool: +def is_dicom_xray(filepath_or_url: Union[str, Path], strict: bool = False) -> bool: """ For a DICOM file, inspects the Modality meta-data field to determine if the image is an x-ray. This classification is for the TBPortals collection to primarily x-ray from CT scans. @@ -22,14 +23,15 @@ def is_dicom_xray(filename: Path, strict: bool = False) -> bool: If the filename is not in DICOM format, then false is returned. If the filename does not refer to a file, then the "FileNotFound" exception will be thrown. - :param filename: a Path to a DICOM file to inspect + :param filepath_or_url: A string containing either a path or a URL. The URL must be prefixed by either + 'http://' or 'https://'. Or a filepath as a pathlib Path object. :param strict: If true scanned? picture? modalities are not considered. :return: True if the DICOM Modality may be an x-ray image. """ try: - with pydicom.dcmread(filename, specific_tags=["Modality"]) as ds: - _logger.debug(f'"{str(filename)}" has "{ds.Modality}" modality.') + with read_dcm_header_pydicom(filepath_or_url, specific_tags=["Modality"]) as ds: + _logger.debug(f'"{str(filepath_or_url)}" has "{ds.Modality}" modality.') if strict: return ds.Modality in _strict_modalities return ds.Modality in _acceptable_modalities diff --git a/rap_sitkcore/read_dcm_headers.py b/rap_sitkcore/read_dcm_headers.py index 3551e7f..bab64d5 100644 --- a/rap_sitkcore/read_dcm_headers.py +++ b/rap_sitkcore/read_dcm_headers.py @@ -1,8 +1,9 @@ """ Functions related to retrieving the dicom headers from a file """ import pydicom import requests -import pathlib +from pathlib import Path from io import SEEK_SET, SEEK_END +from typing import Any, Union class _ResponseStream(object): @@ -96,13 +97,13 @@ def seek(self, position, whence=SEEK_SET): def _read_dcm_header_pydicom( - filepath_or_url: str, download_chunk_size_bytes: int = 10240, stream: bool = True + filepath_or_url: Union[str, Path], download_chunk_size_bytes: int = 10240, stream: bool = True, **kwargs: Any ) -> tuple: """ Get the DICOM headers from either a local file (by file path) or a remote file (by URL). :param filepath_or_url: A string containing either a path or a URL. The URL must be prefixed by either - 'http://' or 'https://'. + 'http://' or 'https://'. Or a filepath as a pathlib Path object. :param download_chunk_size_bytes: An integer controlling the download chunk size if the file is remote, defaults to 1024 bytes. :param stream: A boolean indicating whether the HTTP request (if filepath_or_url points to a remote file) @@ -114,37 +115,41 @@ def _read_dcm_header_pydicom( *should* treat this as a normal request and download the entire request contents before passing to pydicom dcmread. -------- + :param kwargs: keywords are forwarded to pydicom.dcmread :returns: tuple of the following format: ( pydicom.dataset.FileDataset: the pydicom file dataset object, 'fp': the object that pydicom reads from - a ResponseStream for remote files or a path for local ones, - 'response': the raw requests responsee + 'response': the raw requests response ) """ - # Try to see if filepath_or_url is a valid url - try: - # Make request; could/should make this a session for repeated requests. - response = requests.get(filepath_or_url, stream=stream) - except requests.exceptions.MissingSchema: - response = None - # There wasn't an http or https, so treat this as a local file - fp = pathlib.Path(filepath_or_url) + response = None + if isinstance(filepath_or_url, Path): + fp = filepath_or_url else: - # create a seekable interface into requests content iterator - fp = _ResponseStream(response.iter_content(download_chunk_size_bytes)) + # Try to see if filepath_or_url is a valid url + try: + # Make request; could/should make this a session for repeated requests. + response = requests.get(filepath_or_url, stream=stream) + except requests.exceptions.MissingSchema: + # There wasn't an http or https, so treat this as a local file + fp = Path(filepath_or_url) + else: + # create a seekable interface into requests content iterator + fp = _ResponseStream(response.iter_content(download_chunk_size_bytes)) # Return the object that pydicom reads from and the raw requests response (for testing purposes). - return (pydicom.dcmread(fp=fp, stop_before_pixels=True), fp, response) + return (pydicom.dcmread(fp=fp, stop_before_pixels=True, **kwargs), fp, response) def read_dcm_header_pydicom( - filepath_or_url: str, download_chunk_size_bytes: int = 10240, stream: bool = True + filepath_or_url: Union[str, Path], download_chunk_size_bytes: int = 10240, stream: bool = True, **kwargs: Any ) -> pydicom.dataset.FileDataset: """ Get the DICOM headers from either a local file (by file path) or a remote file (by URL). :param filepath_or_url: A string containing either a path or a URL. The URL must be prefixed by either - 'http://' or 'https://'. + 'http://' or 'https://'. Or a filepath as a pathlib Path object. :param download_chunk_size_bytes: An integer controlling the download chunk size if the file is remote, defaults to 1024 bytes. :param stream: A boolean indicating whether the HTTP request (if filepath_or_url points to a remote file) @@ -156,8 +161,9 @@ def read_dcm_header_pydicom( *should* treat this as a normal request and download the entire request contents before passing to pydicom dcmread. -------- + :param kwargs: keywords are forwarded to pydicom.dcmread :returns: a pydicom.dataset.FileDataset representing the DICOM indicated by filepath_or_url """ return _read_dcm_header_pydicom( - filepath_or_url=filepath_or_url, download_chunk_size_bytes=download_chunk_size_bytes, stream=stream + filepath_or_url=filepath_or_url, download_chunk_size_bytes=download_chunk_size_bytes, stream=stream, **kwargs )[0] diff --git a/test/unit/test_is_dicom_xray.py b/test/unit/test_is_dicom_xray.py index d0e845f..5b4e6f9 100644 --- a/test/unit/test_is_dicom_xray.py +++ b/test/unit/test_is_dicom_xray.py @@ -28,6 +28,7 @@ def test_is_dicom_xray1(test_file, is_xray, data_paths): filename = data_paths[test_file] assert rap_sitkcore.is_dicom_xray(Path(filename)) == is_xray + assert rap_sitkcore.is_dicom_xray(str(filename)) == is_xray def test_is_dicom_xray2(): @@ -59,10 +60,6 @@ def test_is_dicom_xray3(): assert not rap_sitkcore.is_dicom_xray(fpath) -def test_is_dicom_xray4(remote_data_paths): - print(remote_data_paths) - - @pytest.mark.parametrize( ("test_file", "is_xray"), [ @@ -92,3 +89,12 @@ def test_is_dicom_xray4(test_file, is_xray, data_paths): filename = data_paths[test_file] assert rap_sitkcore.is_dicom_xray(Path(filename), strict=True) == is_xray + + +@pytest.mark.parametrize( + ("test_file", "is_xray"), [("1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", True)] +) +def test_is_dicom_xray5(test_file, is_xray, remote_data_paths): + + url = remote_data_paths[test_file] + assert rap_sitkcore.is_dicom_xray(url, strict=True) == is_xray diff --git a/test/unit/test_read_dcm_headers.py b/test/unit/test_read_dcm_headers.py index 3853c3d..2f2bed1 100644 --- a/test/unit/test_read_dcm_headers.py +++ b/test/unit/test_read_dcm_headers.py @@ -1,5 +1,4 @@ import pytest -import requests import time from rap_sitkcore.read_dcm_headers import _read_dcm_header_pydicom