-
Notifications
You must be signed in to change notification settings - Fork 3
/
make_dataset.py
executable file
·93 lines (77 loc) · 3.73 KB
/
make_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -*- coding: utf-8 -*-
import glob
import os
from src.data.Dataset import ensure_dir, create_2d_slices_from_3d_volume_files
# Download and unpack the raw data
# small helper
def clean_import(dir_path):
import shutil
try:
shutil.rmtree(dir_path)
except OSError as e:
print("Error: %s : %s" % (dir_path, e.strerror))
print('Dont worry, irectory will be created.')
ensure_dir(dir_path)
def main(data_root, path_to_acdc_original):
# ------------------------------------------define logging and working directory
from ProjectRoot import change_wd_to_project_root
change_wd_to_project_root()
# define a folder for the acdc cmr and masks, make sure not to use an existing folder
import_path = os.path.join(data_root, 'import')
ensure_dir(data_root)
clean_import(import_path)
# download cleaned rvip 3D cmr and masks
url = 'https://heibox.uni-heidelberg.de/f/8776d7311ec84723aacf/?dl=1 '
os.system('wget {} -P {}'.format(url, import_path))
print('downloaded')
# unzip and replace
zip_file = glob.glob(os.path.join(import_path, 'index.html?dl=*'))[0]
os.system('unzip -o {} -d {}'.format(zip_file, data_root))
# clean temp import older
clean_import(import_path)
# remove old and download new cv-dataframe
os.system('rm {} -f'.format(os.path.join(data_root, 'df_kfold.csv')))
url2 = 'https://heibox.uni-heidelberg.de/f/03f57e89dc8b46668144/?dl=1'
os.system('wget {} -P {}'.format(url2, import_path))
print('downloaded')
# unzip and replace
zip_file = glob.glob(os.path.join(import_path, 'index.html?dl=*'))[0]
os.system('unzip -o {} -d {}'.format(zip_file, data_root))
# clean temp import folder
clean_import(import_path)
# io == interobserver
# pp == 100 patients x phases xrvip/cmr = 400 files
print('collect 3D CMR from: {}'.format(path_to_acdc_original))
# searches in all patient folders for any 3D CMR (2 frames per patient) as nifti
images = sorted(glob.glob(os.path.join(path_to_acdc_original, '*/*frame[0-9][0-9].nii.gz')))
print('images: {}'.format(len(images)))
# quality check of the image and mask names, find names with wrong names
# give input and output path here
input_path = os.path.join(data_root, 'pp')
export_path = os.path.join(data_root, '2D')
# images = sorted(glob.glob(os.path.join(input_path, '*frame[0-9][0-9].nii')))
masks = sorted(glob.glob(os.path.join(input_path,
'*frame[0-9][0-9]_rvip.nrrd'))) # searches in all first level folders for any mask as nrrd
print('images: {}'.format(len(images)))
print('masks: {}'.format(len(masks)))
assert (len(images) == len(masks)), 'len(images)-> {} != len(masks)-> {} '.format(len(images), len(masks))
# in the optimal case there should be as many images as masks. If not, some of the annotations might have been saved with a wrong name.
# Slice the 3D vol in 2D slices
ensure_dir(export_path)
_ = [create_2d_slices_from_3d_volume_files(img_f=img, mask_f=msk, export_path=export_path) for img, msk in
zip(images, masks)]
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='train a RV IP detection/segmentation model on CMR images')
# usually these two parameters should encapsulate all experiment parameters
parser.add_argument('-data_root', action='store', default='data/import')
parser.add_argument('-acdc_data', action='store', default='data/import/original')
results = parser.parse_args()
print('given parameters: {}'.format(results))
data_root = results.data_root
acdc_data = results.acdc_data
try:
main(data_root, acdc_data)
except Exception as e:
print(e)
exit()