Skip to content

Commit

Permalink
Merge pull request #26 from niaid/is_xray_update
Browse files Browse the repository at this point in the history
is_xray update to support URLs
  • Loading branch information
blowekamp authored Jun 9, 2022
2 parents 0197618 + 350a8d5 commit a13f975
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 78 deletions.
39 changes: 39 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pytest
import requests


@pytest.fixture(scope="module")
def data_paths():
"""
A dictionary of test data file names to full paths.
"""
from pathlib import Path

path_dic = {}
for p in (Path(__file__).parent / "test" / "data").glob("*"):
if p.is_file():
path_dic[p.name] = str(p.absolute())
return path_dic


@pytest.fixture(scope="session")
def remote_data_paths():
# Get s3 URL to DICOM files in TBPORTALS
# TODO: try to work around doing this, some of the tests will break if DEPOT updates their DICOMS.
# I couldn't find an easy way to set up a local server to test our data_paths that could implement streaming.
# This could probably be implemented by modifying the python http.server BaseHTTPRequestHandler, but ideally
# there is an easier way than that.
payloads = [
{
"directory": (
"00069df2-2406-43b6-8c58-5f5e164c7e35/"
"1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.10/"
"1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.11"
),
"objectKey": "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm",
}
]
presigned_urls = requests.post(
"https://depotapi.tbportals.niaid.nih.gov/api/Amazon/GetBulkPresignedUrls", json=payloads
).json()
yield {payload["objectKey"]: presigned_urls[i] for i, payload in enumerate(payloads)}
12 changes: 7 additions & 5 deletions rap_sitkcore/is_dicom_xray.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pydicom
from pydicom.errors import InvalidDicomError
from pathlib import Path
from rap_sitkcore.read_dcm_headers import read_dcm_header_pydicom
from typing import Union


import logging
Expand All @@ -14,22 +15,23 @@
)


def is_dicom_xray(filename: Path, strict: bool = False) -> bool:
def is_dicom_xray(filepath_or_url: Union[str, Path], strict: bool = False) -> bool:
"""
For a DICOM file, inspects the Modality meta-data field to determine if the image is an x-ray. This classification
is for the TBPortals collection to primarily x-ray from CT scans.
If the filename is not in DICOM format, then false is returned.
If the filename does not refer to a file, then the "FileNotFound" exception will be thrown.
:param filename: a Path to a DICOM file to inspect
:param filepath_or_url: A string containing either a path or a URL. The URL must be prefixed by either
'http://' or 'https://'. Or a filepath as a pathlib Path object.
:param strict: If true scanned? picture? modalities are not considered.
:return: True if the DICOM Modality may be an x-ray image.
"""

try:
with pydicom.dcmread(filename, specific_tags=["Modality"]) as ds:
_logger.debug(f'"{str(filename)}" has "{ds.Modality}" modality.')
with read_dcm_header_pydicom(filepath_or_url, specific_tags=["Modality"]) as ds:
_logger.debug(f'"{str(filepath_or_url)}" has "{ds.Modality}" modality.')
if strict:
return ds.Modality in _strict_modalities
return ds.Modality in _acceptable_modalities
Expand Down
50 changes: 28 additions & 22 deletions rap_sitkcore/read_dcm_headers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
""" Functions related to retrieving the dicom headers from a file """
import pydicom
import requests
import pathlib
from pathlib import Path
from io import SEEK_SET, SEEK_END
from typing import Any, Union


class _ResponseStream(object):
Expand Down Expand Up @@ -96,68 +97,73 @@ def seek(self, position, whence=SEEK_SET):


def _read_dcm_header_pydicom(
filepath_or_url: str, download_chunk_size_bytes: int = 10240, stream: bool = True
filepath_or_url: Union[str, Path], download_chunk_size_bytes: int = 10240, stream: bool = True, **kwargs: Any
) -> tuple:
"""
Get the DICOM headers from either a local file (by file path) or a remote file (by URL).
:param filepath_or_url: A string containing either a path or a URL. The URL must be prefixed by either
'http://' or 'https://'.
'http://' or 'https://'. Or a filepath as a pathlib Path object.
:param download_chunk_size_bytes: An integer controlling the download chunk size if the file is remote,
defaults to 1024 bytes.
:param stream: A boolean indicating whether the HTTP request (if filepath_or_url points to a remote file)
should be streamed.
If filepath_or_url points to a local file, this parameter is ignored.
****************************************************************************************
-------
For this to work, the server must support the chunked transfer-encoding. If it doesn't, requests
*should* treat this as a normal request and download the entire request contents before passing to
pydicom dcmread.
****************************************************************************************
--------
:param kwargs: keywords are forwarded to pydicom.dcmread
:returns: tuple of the following format:
(
pydicom.dataset.FileDataset: the pydicom file dataset object,
'fp': the object that pydicom reads from - a ResponseStream for remote files or a path for
local ones,
'response': the raw requests responsee
'response': the raw requests response
)
"""
# Try to see if filepath_or_url is a valid url
try:
# Make request; could/should make this a session for repeated requests.
response = requests.get(filepath_or_url, stream=stream)
except requests.exceptions.MissingSchema:
response = None
# There wasn't an http or https, so treat this as a local file
fp = pathlib.Path(filepath_or_url)
response = None
if isinstance(filepath_or_url, Path):
fp = filepath_or_url
else:
# create a seekable interface into requests content iterator
fp = _ResponseStream(response.iter_content(download_chunk_size_bytes))
# Try to see if filepath_or_url is a valid url
try:
# Make request; could/should make this a session for repeated requests.
response = requests.get(filepath_or_url, stream=stream)
except requests.exceptions.MissingSchema:
# There wasn't an http or https, so treat this as a local file
fp = Path(filepath_or_url)
else:
# create a seekable interface into requests content iterator
fp = _ResponseStream(response.iter_content(download_chunk_size_bytes))
# Return the object that pydicom reads from and the raw requests response (for testing purposes).
return (pydicom.dcmread(fp=fp, stop_before_pixels=True), fp, response)
return (pydicom.dcmread(fp=fp, stop_before_pixels=True, **kwargs), fp, response)


def read_dcm_header_pydicom(
filepath_or_url: str, download_chunk_size_bytes: int = 10240, stream: bool = True
filepath_or_url: Union[str, Path], download_chunk_size_bytes: int = 10240, stream: bool = True, **kwargs: Any
) -> pydicom.dataset.FileDataset:
"""
Get the DICOM headers from either a local file (by file path) or a remote file (by URL).
:param filepath_or_url: A string containing either a path or a URL. The URL must be prefixed by either
'http://' or 'https://'.
'http://' or 'https://'. Or a filepath as a pathlib Path object.
:param download_chunk_size_bytes: An integer controlling the download chunk size if the file is remote,
defaults to 1024 bytes.
:param stream: A boolean indicating whether the HTTP request (if filepath_or_url points to a remote file)
should be streamed.
If filepath_or_url points to a local file, this parameter is ignored.
****************************************************************************************
--------
For this to work, the server must support the chunked transfer-encoding. If it doesn't, requests
*should* treat this as a normal request and download the entire request contents before passing to
pydicom dcmread.
****************************************************************************************
--------
:param kwargs: keywords are forwarded to pydicom.dcmread
:returns: a pydicom.dataset.FileDataset representing the DICOM indicated by filepath_or_url
"""
return _read_dcm_header_pydicom(
filepath_or_url=filepath_or_url, download_chunk_size_bytes=download_chunk_size_bytes, stream=stream
filepath_or_url=filepath_or_url, download_chunk_size_bytes=download_chunk_size_bytes, stream=stream, **kwargs
)[0]
14 changes: 0 additions & 14 deletions test/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +0,0 @@
def _data_files():
"""
A dictionary of test data file names to full paths.
"""
from pathlib import Path

path_dic = {}
for p in (Path(__file__).parent / "data").glob("*"):
if p.is_file():
path_dic[p.name] = str(p.absolute())
return path_dic


data_paths = _data_files()
15 changes: 12 additions & 3 deletions test/unit/test_is_dicom_xray.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import rap_sitkcore.is_dicom_xray
import pytest
from pathlib import Path
from .. import data_paths
import os
import tempfile

Expand All @@ -23,12 +22,13 @@
("n12.dcm", False), # "OT" modality
],
)
def test_is_dicom_xray1(test_file, is_xray):
def test_is_dicom_xray1(test_file, is_xray, data_paths):
""" """

filename = data_paths[test_file]

assert rap_sitkcore.is_dicom_xray(Path(filename)) == is_xray
assert rap_sitkcore.is_dicom_xray(str(filename)) == is_xray


def test_is_dicom_xray2():
Expand Down Expand Up @@ -83,9 +83,18 @@ def test_is_dicom_xray3():
("square_uint8.dcm", True),
],
)
def test_is_dicom_xray4(test_file, is_xray):
def test_is_dicom_xray4(test_file, is_xray, data_paths):
"""Testing with strict option"""

filename = data_paths[test_file]

assert rap_sitkcore.is_dicom_xray(Path(filename), strict=True) == is_xray


@pytest.mark.parametrize(
("test_file", "is_xray"), [("1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", True)]
)
def test_is_dicom_xray5(test_file, is_xray, remote_data_paths):

url = remote_data_paths[test_file]
assert rap_sitkcore.is_dicom_xray(url, strict=True) == is_xray
9 changes: 4 additions & 5 deletions test/unit/test_read_dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from rap_sitkcore._dicom_util import keyword_to_gdcm_tag
import pytest
from pathlib import Path
from .. import data_paths
import SimpleITK as sitk

_white_listed_dicom_tags = [
Expand Down Expand Up @@ -40,7 +39,7 @@
"square_uint8.dcm",
],
)
def test_read_dcm1(test_file):
def test_read_dcm1(test_file, data_paths):
""" """

filename = data_paths[test_file]
Expand Down Expand Up @@ -83,7 +82,7 @@ def test_read_dcm2():
("2.25.298570032897489859462791131067889681111.dcm", (4000, 3000), "9eae70e3f4601c19e88feb63d31fd92e", "CR"),
],
)
def test_read_dcm3(file_name, image_size, image_md5, modality):
def test_read_dcm3(file_name, image_size, image_md5, modality, data_paths):
"""Test migrated from tbp_image_refresh_thumbnail"""
dicom_tag_modality = "0008|0060"

Expand Down Expand Up @@ -117,7 +116,7 @@ def test_read_dcm3(file_name, image_size, image_md5, modality):
("square_uint8.dcm", 1),
],
)
def test_read_dcm_pydicom(test_file, number_of_components):
def test_read_dcm_pydicom(test_file, number_of_components, data_paths):
filename = data_paths[test_file]

img = _read_dcm_pydicom(Path(filename))
Expand Down Expand Up @@ -148,7 +147,7 @@ def test_read_dcm_pydicom(test_file, number_of_components):
("square_uint8.dcm", False, 1),
],
)
def test_read_dcm_simpleitk(test_file, expected_exception, number_of_components):
def test_read_dcm_simpleitk(test_file, expected_exception, number_of_components, data_paths):
filename = data_paths[test_file]

if expected_exception:
Expand Down
29 changes: 2 additions & 27 deletions test/unit/test_read_dcm_headers.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,10 @@
import pytest
import requests
import time
from rap_sitkcore.read_dcm_headers import _read_dcm_header_pydicom
from .. import data_paths


@pytest.fixture(scope="module")
def remote_data_paths():
# Get s3 URL to DICOM files in TBPORTALS
# TODO: try to work around doing this, some of the tests will break if DEPOT updates their DICOMS.
# I couldn't find an easy way to set up a local server to test our data_paths that could implement streaming.
# This could probably be implemented by modifying the python http.server BaseHTTPRequestHandler, but ideally
# there is an easier way than that.
payloads = [
{
"directory": (
"00069df2-2406-43b6-8c58-5f5e164c7e35/"
"1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.10/"
"1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.11"
),
"objectKey": "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm",
}
]
presigned_urls = requests.post(
"https://depotapi.tbportals.niaid.nih.gov/api/Amazon/GetBulkPresignedUrls", json=payloads
).json()
yield {payload["objectKey"]: presigned_urls[i] for i, payload in enumerate(payloads)}


@pytest.mark.parametrize("input_file", ["1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm"])
def test_read_dcm_headers(input_file, request):
def test_read_dcm_headers(input_file, request, data_paths, remote_data_paths):
#####################################################################
# test with local file first
#####################################################################
Expand All @@ -43,7 +18,7 @@ def test_read_dcm_headers(input_file, request):
#####################################################################
# Next test with remote file, without streaming
#####################################################################
presigned_url = request.getfixturevalue("remote_data_paths")[input_file]
presigned_url = remote_data_paths[input_file]

st = time.perf_counter()
remote_dataset_wo_streaming, fp_wo_streaming, response_wo_streaming = _read_dcm_header_pydicom(
Expand Down
3 changes: 1 addition & 2 deletions test/unit/test_resize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from rap_sitkcore import read_dcm, resize_and_scale_uint8
import pytest
from pathlib import Path
from .. import data_paths
import SimpleITK as sitk


Expand All @@ -13,7 +12,7 @@
("square_uint8.dcm", (256, 256), "e8358046f74f0977f2e55df97bab0318"),
],
)
def test_resize_and_scale_uint8_1(file_name, thumbnail_size, md5_hash):
def test_resize_and_scale_uint8_1(file_name, thumbnail_size, md5_hash, data_paths):

filename = data_paths[file_name]
img = read_dcm(Path(filename))
Expand Down

0 comments on commit a13f975

Please sign in to comment.