From 6c0742e2ce7bd5ca824b3c8cdc663005745243e9 Mon Sep 17 00:00:00 2001 From: vsoch Date: Mon, 30 Oct 2017 16:30:29 -0700 Subject: [PATCH] changes --- sendit/apps/main/tasks/finish.py | 26 +++++++++++++++++++++----- sendit/apps/main/tasks/get.py | 27 +++++++++++++++++++++++++-- sendit/apps/main/tasks/update.py | 2 +- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/sendit/apps/main/tasks/finish.py b/sendit/apps/main/tasks/finish.py index 49aaf67..3d16cc1 100644 --- a/sendit/apps/main/tasks/finish.py +++ b/sendit/apps/main/tasks/finish.py @@ -147,8 +147,16 @@ def upload_storage(batch_ids=None): if valid is False: continue + # Add additional shared metadata studycode = batch_ids.shared['AccessionNumber'] coded_mrn = batch_ids.shared['PatientID'] + batch_ids.shared['CodedPatientID'] = coded_mrn + batch_ids.shared['ContentType'] = 'application/gzip' + batch_ids.shared['CodedAccessionNumberID'] = studycode + batch_ids.shared['NumberOfSeries'] = batch.qa['NumberOfSeries'] + batch_ids.shared['Series'] = batch.qa['Series'] + batch_ids.shared['RemovedSeries'] = batch.qa['FlaggedSeries'] + timestamp = get_timestamp(batch_ids.shared['StudyDate'], format = "%Y%m%d") @@ -174,18 +182,21 @@ def upload_storage(batch_ids=None): batch.logs['IMAGE_COUNT'] = len(images) batch_ids.save() batch.save() + if valid is True: - items_metadata = batch_ids.shared - items = { compressed_file: items_metadata } - cleaned = deepcopy(batch_ids.cleaned) - metadata = prepare_entity_metadata(cleaned_ids=cleaned) + + metadata = deepcopy(batch_ids.shared) + metadata['DicomHeader'] = json.dumps(metadata) + metadata = { compressed_file: metadata } bot.log("Uploading %s with %s images to Google Storage %s" %(os.path.basename(compressed_file), len(images), GOOGLE_CLOUD_STORAGE)) # We only expect to have one entity per batch kwargs = {"items":[compressed_file], "table":table, - "metadata": metadata} + "metadata": metadata, + "batch": False} # upload in batches at END + # Batch metadata upload_dataset(client=client, k=kwargs) @@ -204,6 +215,10 @@ def upload_storage(batch_ids=None): batch.qa['ElapsedTime'] = total_time batch.save() + # After image upload, metadata can be uploaded on one batch + # If this isn't optimal, change "batch" in kwargs to False + return = client.batch.runInsert(table) + @shared_task def clean_up(bid, remove_batch=False): @@ -241,6 +256,7 @@ def upload_dataset(client, k): mimetype="application/gzip", entity_key=ENTITY_ID, item_key=ITEM_ID, + batch=k['batch'], metadata=k['metadata'], permission="projectPrivate") # default batch=True diff --git a/sendit/apps/main/tasks/get.py b/sendit/apps/main/tasks/get.py index d61db3d..45e5709 100644 --- a/sendit/apps/main/tasks/get.py +++ b/sendit/apps/main/tasks/get.py @@ -98,7 +98,6 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True): bot.error('%s is not a directory, skipping.' %dicom_dir) return - bot.debug("Importing %s, found %s .dcm files" %(dicom_dir,len(dicom_files))) # The batch --> the folder with a set of dicoms tied to one request @@ -108,6 +107,8 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True): # Data quality check: keep a record of study dates study_dates = dict() + series = {} + all_series = [] size_bytes = sum(os.path.getsize(f) for f in dicom_files) messages = [] # print all unique messages / warnings at end @@ -121,6 +122,10 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True): # Keep track of studyDate study_date = dcm.get('StudyDate') + series_id = dcm.get('SeriesNumber') + if series_id not in all_series: + all_series.append(series_id) + if study_date not in study_dates: study_dates[study_date] = 0 study_dates[study_date] += 1 @@ -148,6 +153,18 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True): dicom = Image.objects.create(batch=batch, uid=dicom_uid) + # Series Number and count of slices (images) + if series_id is not None and series_id not in series: + series[series_id] = {'SeriesNumber': series_id } + + # Series Description + description = dcm.get('SeriesDescription') + if dcm.get('SeriesDescription') is not None: + series[series_id]['SeriesDescription'] = description + + else: + series[series_id]['Images'] +=1 + # Save the dicom file to storage # basename = "%s/%s" %(batch.id,os.path.basename(dcm_file)) dicom = save_image_dicom(dicom=dicom, @@ -181,7 +198,13 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True): dcm_file) batch = add_batch_error(message,batch) + # Which series aren't represented with data? + removed_series = [x for x in all_series if x not in list(series.keys())] + # Save batch thus far + batch.qa['NumberOfSeries'] = len(series) + batch.qa['FlaggedSeries'] = removed_series + batch.qa['Series'] = series batch.qa['StudyDate'] = study_dates batch.qa['StartTime'] = start_time batch.qa['SizeBytes'] = size_bytes @@ -245,7 +268,7 @@ def get_identifiers(bid,study=None,run_replace_identifiers=True): # Process all dicoms at once, one call to the API dicom_files = batch.get_image_paths() batch.status = "PROCESSING" - batch.save() # redundant + batch.save() try: ids = get_ids(dicom_files=dicom_files, diff --git a/sendit/apps/main/tasks/update.py b/sendit/apps/main/tasks/update.py index 2488322..be3f6d3 100644 --- a/sendit/apps/main/tasks/update.py +++ b/sendit/apps/main/tasks/update.py @@ -146,7 +146,7 @@ def replace_identifiers(bid, run_upload_storage=False): # Get shared information aggregate = ["BodyPartExamined", "Modality", "StudyDescription"] shared_ids = get_shared_identifiers(dicom_files=updated_files, - aggregate=aggregate) + aggregate=aggregate) batch_ids.shared = shared_ids batch_ids.save()