Merge pull request #79 from Emory-HITI/dev

Skip when EMPI is not found for a given Accession.
Emory-HITI · Dec 10, 2020 · 77c9baf · 77c9baf
2 parents 1506dc8 + aa143ad
commit 77c9baf
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 22 deletions.
diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
@@ -194,24 +194,28 @@ def retrieve():
                 Accession = accessions[pid]
                 subprocess.call("{0}/findscu -c {1} -b {2} -m AccessionNumber={3} -r PatientID  -r StudyInstanceUID -x stid.csv.xsl --out-cat --out-file intermediate.csv --out-dir .".format(DCM4CHE_BIN, SRC_AET, QUERY_AET, Accession), shell=True)
 
-            #Processing the Intermediate CSV file with EMPI and StudyIDs
-            with open('intermediate1.csv', newline='') as g: #DCM4CHE appends 1.
-                reader2 = csv.reader(g)
-                # Array of studies
-                patients2 = []
-                studies2 = []
-                for row2 in reader2:
-                    patients2.append(row2[1])
-                    studies2.append(row2[0])
+            try:
+                #Processing the Intermediate CSV file with EMPI and StudyIDs
+                with open('intermediate1.csv', newline='') as g: #DCM4CHE appends 1.
+                    reader2 = csv.reader(g)
+                    # Array of studies
+                    patients2 = []
+                    studies2 = []
+                    for row2 in reader2:
+                        patients2.append(row2[1])
+                        studies2.append(row2[0])
 
-            # Create our Identifier (query) dataset
-            for pid2 in range(0, len(patients2)):
-                Study = studies2[pid2]
-                Patient = patients2[pid2]
-                temp_id = Patient + SEPARATOR + Study
-                if ((not resume) or (resume and (temp_id not in extracted_ones))):
-                    subprocess.call("{0}/movescu -c {1} -b {2} -M PatientRoot -m PatientID={3} -m StudyInstanceUID={4} --dest {5}".format(DCM4CHE_BIN, SRC_AET, QUERY_AET, Patient, Study, DEST_AET), shell=True)
-                    extracted_ones.append(temp_id)
+                # Create our Identifier (query) dataset
+                for pid2 in range(0, len(patients2)):
+                    Study = studies2[pid2]
+                    Patient = patients2[pid2]
+                    temp_id = Patient + SEPARATOR + Study
+                    if ((not resume) or (resume and (temp_id not in extracted_ones))):
+                        subprocess.call("{0}/movescu -c {1} -b {2} -M PatientRoot -m PatientID={3} -m StudyInstanceUID={4} --dest {5}".format(DCM4CHE_BIN, SRC_AET, QUERY_AET, Patient, Study, DEST_AET), shell=True)
+                        extracted_ones.append(temp_id)
+
+            except IOError:
+                logging.info("No EMPI, StudyInstanceUID found for the current entry. Skipping this line, and moving to the next")
 
 
     # Kill the running storescp process of QbNiffler.

diff --git a/modules/cold-extraction/README.md b/modules/cold-extraction/README.md
@@ -99,7 +99,7 @@ config.json entries are to be set *for each* Niffler on-demand DICOM extractions
 
 * *NifflerSystem*: By default, system.json. Provide a custom json file with Niffler system information, if you have any.
 
-* *StorageFolder*: Create a folder where you like your DICOM files to be. Usually, this is an empty folder (since each extraction is unique). Make sure the python program has write access to that folder.
+* *StorageFolder*: Create a folder where you like your DICOM files to be. Usually, this is an empty folder (since each extraction is unique). Make sure you, i.e., the user that starts Niffler ColdDataRetriever.py, have write access to that folder.
 
 * *FilePath*: By default, "{00100020}/{0020000D}/{0020000E}/{00080018}.dcm". This indicates a hierarchical storage of patients/studies/series/instances.dcm. Leave this value as it is unless you want to change the hierarchy.
 

diff --git a/modules/png-extraction/README.md b/modules/png-extraction/README.md
@@ -29,10 +29,6 @@ The below two fields can be left unmodified for most executions. The default val
 
 $ nohup python3 ImageExtractor.py > UNIQUE-OUTPUT-FILE-FOR-YOUR-EXTRACTION.out &
 
-There is also an experimental PNG extractor implementation that provides a distributed execution based on Slurm.
-
-$ nohup python3 ImageExtractorSlurm.py > UNIQUE-OUTPUT-FILE-FOR-YOUR-EXTRACTION.out &
-
 Check that the extraction is going smooth with no errors, by,
 
 $ tail -f UNIQUE-OUTPUT-FILE-FOR-YOUR-EXTRACTION.out
@@ -51,3 +47,8 @@ In the OutputDirectory, there will be several sub folders and directories.
 * *extracted-images*: The folder that consists of extracted PNG images
 
 * *failed-dicom*: The folder that consists of the DICOM images that failed to produce the PNG images upon the execution of the Niffler PNG Extractor. Failed DICOM images are stored in 3 sub-folders named 1, 2, 3, and 4, categorizing according to their failure reason.
+
+
+## Running the Niffler PNG Extractor with Slurm
+
+There is also an experimental PNG extractor implementation (ImageExtractorSlurm.py) that provides a distributed execution based on Slurm on a cluster.