From 6c0742e2ce7bd5ca824b3c8cdc663005745243e9 Mon Sep 17 00:00:00 2001
From: vsoch <vsochat@stanford.edu>
Date: Mon, 30 Oct 2017 16:30:29 -0700
Subject: [PATCH] changes

---
 sendit/apps/main/tasks/finish.py | 26 +++++++++++++++++++++-----
 sendit/apps/main/tasks/get.py    | 27 +++++++++++++++++++++++++--
 sendit/apps/main/tasks/update.py |  2 +-
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/sendit/apps/main/tasks/finish.py b/sendit/apps/main/tasks/finish.py
index 49aaf67..3d16cc1 100644
--- a/sendit/apps/main/tasks/finish.py
+++ b/sendit/apps/main/tasks/finish.py
@@ -147,8 +147,16 @@ def upload_storage(batch_ids=None):
                 if valid is False:
                     continue
 
+            # Add additional shared metadata
             studycode = batch_ids.shared['AccessionNumber']
             coded_mrn = batch_ids.shared['PatientID']
+            batch_ids.shared['CodedPatientID'] = coded_mrn
+            batch_ids.shared['ContentType'] = 'application/gzip'
+            batch_ids.shared['CodedAccessionNumberID'] = studycode
+            batch_ids.shared['NumberOfSeries'] = batch.qa['NumberOfSeries']
+            batch_ids.shared['Series'] = batch.qa['Series']
+            batch_ids.shared['RemovedSeries'] = batch.qa['FlaggedSeries']
+
             timestamp = get_timestamp(batch_ids.shared['StudyDate'],
                                       format = "%Y%m%d")            
 
@@ -174,18 +182,21 @@ def upload_storage(batch_ids=None):
             batch.logs['IMAGE_COUNT'] = len(images)
             batch_ids.save()
             batch.save()
+
             if valid is True:
-                items_metadata = batch_ids.shared
-                items = { compressed_file: items_metadata }
-                cleaned = deepcopy(batch_ids.cleaned)
-                metadata = prepare_entity_metadata(cleaned_ids=cleaned)
+
+                metadata = deepcopy(batch_ids.shared)
+                metadata['DicomHeader'] = json.dumps(metadata)
+                metadata = { compressed_file: metadata }
                 bot.log("Uploading %s with %s images to Google Storage %s" %(os.path.basename(compressed_file),
                                                                          len(images),
                                                                          GOOGLE_CLOUD_STORAGE))
                 # We only expect to have one entity per batch
                 kwargs = {"items":[compressed_file],
                           "table":table,
-                          "metadata": metadata}
+                          "metadata": metadata,
+                          "batch": False} # upload in batches at END
+
                 # Batch metadata    
                 upload_dataset(client=client, k=kwargs)
 
@@ -204,6 +215,10 @@ def upload_storage(batch_ids=None):
             batch.qa['ElapsedTime'] = total_time
             batch.save()
 
+        # After image upload, metadata can be uploaded on one batch
+        # If this isn't optimal, change "batch" in kwargs to False
+        return = client.batch.runInsert(table)
+
 
 @shared_task
 def clean_up(bid, remove_batch=False):
@@ -241,6 +256,7 @@ def upload_dataset(client, k):
                            mimetype="application/gzip",
                            entity_key=ENTITY_ID,
                            item_key=ITEM_ID,
+                           batch=k['batch'],
                            metadata=k['metadata'],
                            permission="projectPrivate") # default batch=True
 
diff --git a/sendit/apps/main/tasks/get.py b/sendit/apps/main/tasks/get.py
index d61db3d..45e5709 100644
--- a/sendit/apps/main/tasks/get.py
+++ b/sendit/apps/main/tasks/get.py
@@ -98,7 +98,6 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
             bot.error('%s is not a directory, skipping.' %dicom_dir)
             return
             
-
         bot.debug("Importing %s, found %s .dcm files" %(dicom_dir,len(dicom_files)))        
 
         # The batch --> the folder with a set of dicoms tied to one request
@@ -108,6 +107,8 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
 
         # Data quality check: keep a record of study dates
         study_dates = dict()
+        series = {}
+        all_series = []
         size_bytes = sum(os.path.getsize(f) for f in dicom_files)
         messages = [] # print all unique messages / warnings at end
 
@@ -121,6 +122,10 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
 
                 # Keep track of studyDate
                 study_date = dcm.get('StudyDate')
+                series_id = dcm.get('SeriesNumber')
+                if series_id not in all_series:
+                    all_series.append(series_id)
+
                 if study_date not in study_dates:
                     study_dates[study_date] = 0
                 study_dates[study_date] += 1
@@ -148,6 +153,18 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
                     dicom = Image.objects.create(batch=batch,
                                                  uid=dicom_uid)
 
+                    # Series Number and count of slices (images)
+                    if series_id is not None and series_id not in series:
+                        series[series_id] = {'SeriesNumber': series_id }
+
+                        # Series Description
+                        description = dcm.get('SeriesDescription')
+                        if dcm.get('SeriesDescription') is not None:
+                            series[series_id]['SeriesDescription'] = description
+
+                    else:
+                        series[series_id]['Images'] +=1
+
                     # Save the dicom file to storage
                     # basename = "%s/%s" %(batch.id,os.path.basename(dcm_file))
                     dicom = save_image_dicom(dicom=dicom,
@@ -181,7 +198,13 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
                                                      dcm_file)
             batch = add_batch_error(message,batch)
 
+        # Which series aren't represented with data?
+        removed_series = [x for x in all_series if x not in list(series.keys())]
+
         # Save batch thus far
+        batch.qa['NumberOfSeries'] = len(series)
+        batch.qa['FlaggedSeries'] = removed_series
+        batch.qa['Series'] = series
         batch.qa['StudyDate'] = study_dates
         batch.qa['StartTime'] = start_time
         batch.qa['SizeBytes'] = size_bytes
@@ -245,7 +268,7 @@ def get_identifiers(bid,study=None,run_replace_identifiers=True):
         # Process all dicoms at once, one call to the API
         dicom_files = batch.get_image_paths()
         batch.status = "PROCESSING"
-        batch.save() # redundant
+        batch.save()
 
         try:
             ids = get_ids(dicom_files=dicom_files,
diff --git a/sendit/apps/main/tasks/update.py b/sendit/apps/main/tasks/update.py
index 2488322..be3f6d3 100644
--- a/sendit/apps/main/tasks/update.py
+++ b/sendit/apps/main/tasks/update.py
@@ -146,7 +146,7 @@ def replace_identifiers(bid, run_upload_storage=False):
     # Get shared information
     aggregate = ["BodyPartExamined", "Modality", "StudyDescription"]
     shared_ids = get_shared_identifiers(dicom_files=updated_files, 
-                                            aggregate=aggregate)
+                                        aggregate=aggregate)
     batch_ids.shared = shared_ids
     batch_ids.save()