-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathradiomics_extractor.py
executable file
·139 lines (120 loc) · 5.54 KB
/
radiomics_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from radiomics import featureextractor, logging as radiomics_logging
import SimpleITK as sitk
import ast
from pathlib import Path
import cv2
import numpy as np
import pandas as pd
import os
import re
import xmltodict
from pydicom import dcmread
# ** DEPRECATION WARNING **
# This script uses PyRadiomics, a python module that as of this date cannot be installed on latest Python.
# If you want to use this script you will either have to use older python version (~3.10), OR
# Use the docker image radiomics/pyradiomics:latest and just install opencv-python, pydicom, and xmltodict on the docker
# Low high mode
low_high_mode = True
chosen_classes = ['mass']
# INBreast Dataset paths
inbreast_path = os.path.join('datasets', 'INbreast Release 1.0')
inbreast_xml_dir = os.path.join(inbreast_path, 'AllXML')
inbreast_dcm_dir = os.path.join(inbreast_path, 'AllDICOMs')
inbreast_csv = os.path.join(inbreast_path, 'INbreast.csv')
output_csv = os.path.join(inbreast_path, 'radiomics_features.csv')
# Initialize PyRadiomics feature extractor
extractor = featureextractor.RadiomicsFeatureExtractor()
extractor.enableAllFeatures() # Enable all features initially
extractor.enabledFeatures['shape'] = [] # Disable 3D shape features
extractor.enabledFeatures['shape2D'] = [] # Enable 2D shape features
# Reduce verbosity of PyRadiomics logging
radiomics_logging.getLogger('radiomics').setLevel(radiomics_logging.ERROR)
def main():
# Initialize variables
all_classes = []
if 'mass' in chosen_classes:
if low_high_mode:
all_classes.extend(['mass_low', 'mass_high'])
else:
all_classes.append('mass')
if 'calcification' in chosen_classes:
all_classes.append('calcification')
inbreast_classes = set()
inbreast_xmls = [str(x) for x in list(Path(inbreast_xml_dir).glob('*.xml'))] # Load XML paths
# Read CSV for malignant/benign
inbreast_csv_data = pd.read_csv(inbreast_csv, sep=';')
file_score_pairs = {}
for filename, score in zip(inbreast_csv_data['File Name'], inbreast_csv_data['Bi-Rads']):
filename = str(filename)
score = int(re.sub(r'\D', '', score)) # 4a, 4b, 4c => 4
file_score_pairs[filename] = score
feature_results = []
for inbreast_xml in inbreast_xmls:
# Read XML to Dict
with open(inbreast_xml) as f:
xml_data = f.read()
xml_dict = xmltodict.parse(xml_data)
entries = xml_dict['plist']['dict']['array']['dict']['array']['dict']
if not isinstance(entries, list):
entries = [entries]
# Prepare rois dict/list
rois = {cls: [] for cls in chosen_classes}
for entry in entries:
class_name = entry['string'][1]
if class_name:
class_name = class_name.lower()
inbreast_classes.add(class_name)
if class_name in chosen_classes:
roi = entry['array'][1]['string']
if isinstance(roi, list):
roi = [ast.literal_eval(point) for point in roi]
elif isinstance(roi, str):
roi = ast.literal_eval(roi)
if len(roi) > 2:
roi = np.array(roi, dtype=np.int32)
rois[class_name].append(roi)
if any(rois.values()):
dcm_prefix = Path(inbreast_xml).stem
patient_dir = None
for filename in os.listdir(inbreast_dcm_dir):
if re.match(dcm_prefix + '.*\.dcm', filename):
patient_dir = os.path.join(inbreast_dcm_dir, filename)
break
if not patient_dir:
raise Exception('The following DICOM file was not found in the directory: ' + dcm_prefix)
cls_suffix = ''
if low_high_mode:
if dcm_prefix in file_score_pairs:
score = file_score_pairs[dcm_prefix]
cls_suffix = '_low' if score <= 3 else '_high'
pixel_array = dcmread(patient_dir).pixel_array
pixel_array = normalize_with_threshold(pixel_array) * 255
pixel_array = np.uint8(pixel_array)
for cls in chosen_classes:
for i, roi in enumerate(rois[cls]):
# Create mask for the ROI with label 1
roi_mask = np.zeros_like(pixel_array, dtype=np.uint8)
roi_mask = cv2.fillPoly(roi_mask, [roi], 1)
# Convert to SimpleITK images
sitk_image = sitk.GetImageFromArray(pixel_array)
sitk_mask = sitk.GetImageFromArray(roi_mask)
# Set the same spacing for image and mask (if known, otherwise defaults are used)
sitk_image.SetSpacing((1.0, 1.0))
sitk_mask.SetSpacing((1.0, 1.0))
# Extract radiomics features
result = extractor.execute(sitk_image, sitk_mask)
result['class'] = cls + cls_suffix
result['roi_index'] = i
result['dcm_prefix'] = dcm_prefix
feature_results.append(result)
# Save features to CSV
pd.DataFrame(feature_results).to_csv(output_csv, index=False)
def normalize(pixel_array):
pixel_range = pixel_array.max() - pixel_array.min()
return (pixel_array - pixel_array.min()) / pixel_range
def normalize_with_threshold(pixel_array, threshold=(1000, 2000)):
pixel_array[pixel_array < threshold[0]] = threshold[0]
pixel_array[pixel_array > threshold[1]] = threshold[1]
return normalize(pixel_array)
if __name__ == '__main__':
main()