Skip to content

Commit

Permalink
changes
Browse files Browse the repository at this point in the history
  • Loading branch information
vsoch committed Oct 30, 2017
1 parent 130a7b7 commit 6c0742e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 8 deletions.
26 changes: 21 additions & 5 deletions sendit/apps/main/tasks/finish.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,16 @@ def upload_storage(batch_ids=None):
if valid is False:
continue

# Add additional shared metadata
studycode = batch_ids.shared['AccessionNumber']
coded_mrn = batch_ids.shared['PatientID']
batch_ids.shared['CodedPatientID'] = coded_mrn
batch_ids.shared['ContentType'] = 'application/gzip'
batch_ids.shared['CodedAccessionNumberID'] = studycode
batch_ids.shared['NumberOfSeries'] = batch.qa['NumberOfSeries']
batch_ids.shared['Series'] = batch.qa['Series']
batch_ids.shared['RemovedSeries'] = batch.qa['FlaggedSeries']

timestamp = get_timestamp(batch_ids.shared['StudyDate'],
format = "%Y%m%d")

Expand All @@ -174,18 +182,21 @@ def upload_storage(batch_ids=None):
batch.logs['IMAGE_COUNT'] = len(images)
batch_ids.save()
batch.save()

if valid is True:
items_metadata = batch_ids.shared
items = { compressed_file: items_metadata }
cleaned = deepcopy(batch_ids.cleaned)
metadata = prepare_entity_metadata(cleaned_ids=cleaned)

metadata = deepcopy(batch_ids.shared)
metadata['DicomHeader'] = json.dumps(metadata)
metadata = { compressed_file: metadata }
bot.log("Uploading %s with %s images to Google Storage %s" %(os.path.basename(compressed_file),
len(images),
GOOGLE_CLOUD_STORAGE))
# We only expect to have one entity per batch
kwargs = {"items":[compressed_file],
"table":table,
"metadata": metadata}
"metadata": metadata,
"batch": False} # upload in batches at END

# Batch metadata
upload_dataset(client=client, k=kwargs)

Expand All @@ -204,6 +215,10 @@ def upload_storage(batch_ids=None):
batch.qa['ElapsedTime'] = total_time
batch.save()

# After image upload, metadata can be uploaded on one batch
# If this isn't optimal, change "batch" in kwargs to False
return = client.batch.runInsert(table)


@shared_task
def clean_up(bid, remove_batch=False):
Expand Down Expand Up @@ -241,6 +256,7 @@ def upload_dataset(client, k):
mimetype="application/gzip",
entity_key=ENTITY_ID,
item_key=ITEM_ID,
batch=k['batch'],
metadata=k['metadata'],
permission="projectPrivate") # default batch=True

Expand Down
27 changes: 25 additions & 2 deletions sendit/apps/main/tasks/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
bot.error('%s is not a directory, skipping.' %dicom_dir)
return


bot.debug("Importing %s, found %s .dcm files" %(dicom_dir,len(dicom_files)))

# The batch --> the folder with a set of dicoms tied to one request
Expand All @@ -108,6 +107,8 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):

# Data quality check: keep a record of study dates
study_dates = dict()
series = {}
all_series = []
size_bytes = sum(os.path.getsize(f) for f in dicom_files)
messages = [] # print all unique messages / warnings at end

Expand All @@ -121,6 +122,10 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):

# Keep track of studyDate
study_date = dcm.get('StudyDate')
series_id = dcm.get('SeriesNumber')
if series_id not in all_series:
all_series.append(series_id)

if study_date not in study_dates:
study_dates[study_date] = 0
study_dates[study_date] += 1
Expand Down Expand Up @@ -148,6 +153,18 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
dicom = Image.objects.create(batch=batch,
uid=dicom_uid)

# Series Number and count of slices (images)
if series_id is not None and series_id not in series:
series[series_id] = {'SeriesNumber': series_id }

# Series Description
description = dcm.get('SeriesDescription')
if dcm.get('SeriesDescription') is not None:
series[series_id]['SeriesDescription'] = description

else:
series[series_id]['Images'] +=1

# Save the dicom file to storage
# basename = "%s/%s" %(batch.id,os.path.basename(dcm_file))
dicom = save_image_dicom(dicom=dicom,
Expand Down Expand Up @@ -181,7 +198,13 @@ def import_dicomdir(dicom_dir, run_get_identifiers=True):
dcm_file)
batch = add_batch_error(message,batch)

# Which series aren't represented with data?
removed_series = [x for x in all_series if x not in list(series.keys())]

# Save batch thus far
batch.qa['NumberOfSeries'] = len(series)
batch.qa['FlaggedSeries'] = removed_series
batch.qa['Series'] = series
batch.qa['StudyDate'] = study_dates
batch.qa['StartTime'] = start_time
batch.qa['SizeBytes'] = size_bytes
Expand Down Expand Up @@ -245,7 +268,7 @@ def get_identifiers(bid,study=None,run_replace_identifiers=True):
# Process all dicoms at once, one call to the API
dicom_files = batch.get_image_paths()
batch.status = "PROCESSING"
batch.save() # redundant
batch.save()

try:
ids = get_ids(dicom_files=dicom_files,
Expand Down
2 changes: 1 addition & 1 deletion sendit/apps/main/tasks/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def replace_identifiers(bid, run_upload_storage=False):
# Get shared information
aggregate = ["BodyPartExamined", "Modality", "StudyDescription"]
shared_ids = get_shared_identifiers(dicom_files=updated_files,
aggregate=aggregate)
aggregate=aggregate)
batch_ids.shared = shared_ids
batch_ids.save()

Expand Down

0 comments on commit 6c0742e

Please sign in to comment.