Skip to content

Commit

Permalink
Merge pull request #242 from woook/new_nextseq
Browse files Browse the repository at this point in the history
New nextseq integrity check
  • Loading branch information
andyb3 authored Jun 12, 2019
2 parents 3f96af3 + 393b308 commit 2164978
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 67 deletions.
22 changes: 11 additions & 11 deletions DNANexus_upload_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,17 +640,17 @@ def create_run_pipeline_command(self):
dest_cmd = self.nexusproject + ":/"

# create the MokaAMP dx command
command = self.mokaamp_command + config.mokaamp_fastq_R1 + read1_cmd + \
config.mokaamp_fastq_R2 + read2_cmd + \
config.mokaamp_mokapicard_input + picard_bedfile + \
config.mokaamp_capturetype + config.mokaamp_capture_type + \
config.mokaamp_bed_PE + mokaamp_bed_PE_input + \
config.mokaamp_cov_level + config.mokaamp_coverage_level + \
config.mokaamp_sambamba_bed + sambamba_bedfile + \
config.mokaamp_vardict_bed + variant_calling_bed + \
config.mokaamp_varscan_bed + variant_calling_bed + \
config.mokaamp_lofreq_bed + variant_calling_bed + \
config.mokaamp_varscan_strandfilter + config.mokaamp_strandfilter + \
command = self.mokaamp_command + config.mokaamp_fastq_R1_stage + read1_cmd + \
config.mokaamp_fastq_R2_stage + read2_cmd + \
config.mokaamp_mokapicard_bed_stage + picard_bedfile + \
config.mokaamp_mokapicard_capturetype_stage + config.mokaamp_capture_type + \
config.mokaamp_bamclipper_BEDPE_stage + mokaamp_bed_PE_input + \
config.mokaamp_chanjo_cov_level_stage + config.mokaamp_coverage_level + \
config.mokaamp_sambamba_bed_stage + sambamba_bedfile + \
config.mokaamp_vardict_bed_stage + variant_calling_bed + \
config.mokaamp_varscan_bed_stage + variant_calling_bed + \
config.mokaamp_lofreq_bed_stage + variant_calling_bed + \
config.mokaamp_varscan_strandfilter_stage + config.mokaamp_strandfilter + \
self.dest + dest_cmd + self.token

# remove the bit that adds the job to the depends on list for the negative control as varscan fails on nearempty/-empty BAM files and this will stop multiqc etc running
Expand Down
37 changes: 14 additions & 23 deletions automate_demultiplex_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
# path to the oncology workflow in the app project
mokaonc_path = "Workflows/Mokaonc_v1.4"
# path to mokaamp
mokaamp_path = "Workflows/MokaAMP_v1.0"
mokaamp_path = "Workflows/MokaAMP_v1.1"
# path to paddy app
peddy_path = "Apps/peddy_v1.3"
# path to multiqc app
Expand Down Expand Up @@ -133,17 +133,17 @@
mokaonc_ingenuity = " -istage-F5k1Qyj0jy1VKJb2KYqq7fxG.email=" # ingenuity app input for amplivar workflow

# MokaAMP
mokaamp_fastq_R1 = " -istage-FPzGj780jy1g3p1F4F8z4J7V.reads_fastqgz="
mokaamp_fastq_R2 = " -istage-FPzGj780jy1g3p1F4F8z4J7V.reads2_fastqgz="
mokaamp_mokapicard_input = " -istage-FPzGjV80jy1x97jg607Fg22b.vendor_exome_bedfile="
mokaamp_capturetype = " -istage-FPzGjV80jy1x97jg607Fg22b.Capture_panel="
mokaamp_bed_PE = " -istage-FPzGjJQ0jy1fF6505zFP6zz9.primers="
mokaamp_cov_level = " -istage-FPzGjfQ0jy1y01vG60K22qG1.coverage_level="
mokaamp_sambamba_bed = " -istage-FPzGjfQ0jy1y01vG60K22qG1.sambamba_bed="
mokaamp_vardict_bed = " -istage-FPzGjgj0jy1Q2JJF2zYx5J5k.bedfile="
mokaamp_varscan_bed = " -istage-FPzGjp80jy1V3Jvb5z6xfpfZ.bed_file="
mokaamp_varscan_strandfilter = " -istage-FPzGjp80jy1V3Jvb5z6xfpfZ.strand_filter="
mokaamp_lofreq_bed = " -istage-FPzGjgQ0jy1fBy972zq9f1PY.bedfile="
mokaamp_fastq_R1_stage = " -istage-FPzGj780jy1g3p1F4F8z4J7V.reads_fastqgz="
mokaamp_fastq_R2_stage = " -istage-FPzGj780jy1g3p1F4F8z4J7V.reads2_fastqgz="
mokaamp_mokapicard_bed_stage = " -istage-FPzGjV80jy1x97jg607Fg22b.vendor_exome_bedfile="
mokaamp_mokapicard_capturetype_stage = " -istage-FPzGjV80jy1x97jg607Fg22b.Capture_panel="
mokaamp_bamclipper_BEDPE_stage = " -istage-FPzGjJQ0jy1fF6505zFP6zz9.primers="
mokaamp_chanjo_cov_level_stage = " -istage-FPzGjfQ0jy1y01vG60K22qG1.coverage_level="
mokaamp_sambamba_bed_stage = " -istage-FPzGjfQ0jy1y01vG60K22qG1.sambamba_bed="
mokaamp_vardict_bed_stage = " -istage-FPzGjgj0jy1Q2JJF2zYx5J5k.bedfile="
mokaamp_varscan_bed_stage = " -istage-FPzGjp80jy1V3Jvb5z6xfpfZ.bed_file="
mokaamp_varscan_strandfilter_stage = " -istage-FPzGjp80jy1V3Jvb5z6xfpfZ.strand_filter="
mokaamp_lofreq_bed_stage = " -istage-FPzGjgQ0jy1fBy972zq9f1PY.bedfile="

mokaamp_strandfilter = "True"
mokaamp_coverage_level = "1000"
Expand Down Expand Up @@ -270,20 +270,11 @@
checksum_complete_flag = "Checksum result reported"
# statement to write when checksums match
checksum_match = "Checksums match"
# hours to wait after RTAcomplete.txt file before first integrity check
integrity_check_first_wait = 3
# hours between integrity checks
integrity_check_repeat_wait = 1
# maximum number of times to perform integrity test
max_number_of_attempts = 10
# list of files which differ between temp and output
missing_files_output = "missing_files.txt"
# files to exclude from integrity check
exclude = ["RTAStart.bat", "CorrectedIntMetrics.bin", "EmpiricalPhasingMetrics.bin", "ErrorMetrics.bin", "EventMetrics.bin", "ExtractionMetrics.bin", "PFGridMetrics.bin", "QMetrics.bin", "RegistrationMetrics.bin", "TileMetrics.bin", "000_000_000_na_rtabat.trans", "FilesAdded.csv", "FilesCopied.csv", "md5checksum.txt", missing_files_output]

# ================ demultiplexing
logfile_success = "Processing completed with 0 errors and 0 warnings."

# list of sequencers which require md5 checksums from integrity check to be assessed
sequencers_with_integrity_check = ["NB551068", "NB552085"]
# =================turnaround time
# if a task takes more than this amount of time it is out of TAT
allowed_time_for_tasks = 4
74 changes: 41 additions & 33 deletions demultiplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ def logger(self, message, tool):
def prepare_integrity_check(self):
"""
We want to ensure the runfolder which was copied to the workstation hasn't been corrupted by the transfer.
This is only possible for the NextSeq.
This is not possible on all seqencers (ie miseq).
Checksums are generated by a script running on the sequencer and these are assessed by this script.
For nextseq runs the presence of the file containing the checksums is assessed.
Expand All @@ -586,20 +586,29 @@ def prepare_integrity_check(self):
# write to log file to say integrity checking is being performed
self.script_logfile.write("Data integrity checks starting...\n")

# if it's not a nextseq run return true to continue without integrity check
if "NB551068" not in self.runfolder:
self.script_logfile.write("MiSeq run identified. Integrity test not possible.\n")
# return True to report integrity checking has passed
# flag to determine if integrity check is required
integrity_check_required = False
# Is this run from a sequencer with integrity checking?
for sequencer in config.sequencers_with_integrity_check:
# if it is set flag = true
if sequencer in self.runfolder:
integrity_check_required = True

# if integrity checking is not required
if not integrity_check_required:
# write to log file to say integrity checking is not required
self.script_logfile.write("Data integrity check not required...continuing\n")
# return True to process, skipping integrity check
return True

# now have determined is a NextSeq run set the path to the checksum file
# now have determined checksums need to be checked
# checksum file should have been written to the runfolder on the workstation by sequencer_checksum.py
checksum_file_path = os.path.join(self.runfolderpath, config.md5checksum_name)

# if the integrity check hasn't been performed yet there won't be a checksum file. If there isn't return False to skip this run until integrity test has been performed
if not os.path.isfile(checksum_file_path):
# write to log file
self.script_logfile.write("Integrity check not yet performed on NextSeq. stopping....\n")
self.script_logfile.write("Integrity check not yet performed on sequencer. stopping....\n")
# and return false to stop the script
return False

Expand All @@ -609,34 +618,33 @@ def prepare_integrity_check(self):
with open(checksum_file_path, 'r') as checksum_file:
# read the checksum file into a list
checksums = checksum_file.readlines()
# assess last line in file (last element in list) to see if the flag which denotes checksum test has already been performed is present.
if config.checksum_complete_flag in checksums[-1]:
self.script_logfile.write("Previously reported failed integrity check\n")
# return false to report integrity check not passed
return False

# assess last line in file (last element in list) to see if the flag which denotes checksum test has already been performed is present.
if config.checksum_complete_flag in checksums[-1]:
self.script_logfile.write("Previously reported failed integrity check\n")
# return false to report integrity check not passed
return False

# if the integrity check result has not yet been assessed...
else:
# pass checksum file path to function which determines if integrity check passed. will return true if the integrity check passed
if self.check_checksums(checksum_file_path):
# write to sys log
self.logger("integrity check of runfolder " + self.runfolder + " passed", "demultiplex_success")
# return True to report integrity checking has passed
return True
# if integrity check failed...
# if the integrity check result has not yet been assessed...
else:
# if it's not a debug run
if not config.debug:
# send an email
self.email_subject = "MOKAPIPE ALERT: INTEGRITY CHECK FAILED"
self.email_priority = 1
self.email_message = "run:\t" + self.runfolder + "\nPlease follow the protocol for when integrity checks fail"
self.send_an_email()
# record test failed in sys log
self.logger("Integrity check fail. checksums do not match for " + self.runfolder + "see " + checksum_file_path, "demultiplex_fail")
# return false to stop the script, saying integrity checking has not been completed
return False
# pass checksum file path to function which determines if integrity check passed. will return true if the integrity check passed
if self.check_checksums(checksum_file_path):
# write to sys log
self.logger("integrity check of runfolder " + self.runfolder + " passed", "demultiplex_success")
# return True to report integrity checking has passed
return True
# if integrity check failed...
else:
# if it's not a debug run
if not config.debug:
# send an email
self.email_subject = "MOKAPIPE ALERT: INTEGRITY CHECK FAILED"
self.email_priority = 1
self.email_message = "run:\t" + self.runfolder + "\nPlease follow the protocol for when integrity checks fail"
self.send_an_email()
# record test failed in sys log
self.logger("Integrity check fail. checksums do not match for " + self.runfolder + "see " + checksum_file_path, "demultiplex_fail")
# return false to stop the script, saying integrity checking has not been completed
return False

def check_checksums(self, checksum_file_path):
"""
Expand Down

0 comments on commit 2164978

Please sign in to comment.