From bdee6d1bc94aae63c2751106893c55a55a57008e Mon Sep 17 00:00:00 2001 From: Aledj2 Date: Mon, 3 Sep 2018 15:41:05 +0100 Subject: [PATCH 1/4] multiqc_v1.5 to v1.6 --- automate_demultiplex_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index 5a85f39f..6880c989 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -9,7 +9,7 @@ debug = False # =====git release for the automate_demultiplexing repo===== -script_release = "v20.0" +script_release = "v21.0" # =====location of input/output files===== # path to run folders @@ -84,7 +84,7 @@ # path to paddy app peddy_path = "Apps/peddy_v1.2" # path to multiqc app -multiqc_path = "Apps/multiqc_v1.5" +multiqc_path = "Apps/multiqc_v1.6" # smartsheet app smartsheet_path = "Apps/smartsheet_mokapipe_complete_v1.1" # RPKM path From 05001b2d9b9b24ff93e72935864f6891f84710f4 Mon Sep 17 00:00:00 2001 From: Aledj2 Date: Tue, 4 Sep 2018 14:58:34 +0100 Subject: [PATCH 2/4] Deleted integrity_test.xml from repo --- integrity_test.xml | Bin 3752 -> 0 bytes sequencer_checksum.py | 328 ------------------------------------------ 2 files changed, 328 deletions(-) delete mode 100644 integrity_test.xml delete mode 100644 sequencer_checksum.py diff --git a/integrity_test.xml b/integrity_test.xml deleted file mode 100644 index 3ce28de8acc40d301f5963739da23c950770166e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3752 zcmbW4S#J|d5QXa*iT@DsezTZMAdxZ328k5b7{P-jWa8jNu_wXfY=0j3PPIFpo*6p^ zS(Bc0SDo5USNHt>)wEZ3YeO5@l3m-0jV!k_yRgh&+7nx|Wm{!jVkNU<=6kGW*7w!V z*0$I7A5!bcyt0-(wnx@vwhPMnOvHvG2CEs8vUF-=ewVgsEBtb7onmw7NXv|8%#5&~ z^ZY<^*}WMed*dGUn7?6l#Tt0LYX9@L%KM7zjv2=jubxc&T*AMmeZoo~gq-ye5@X&K z{Q=Jp-a<^04f4A#bHcprtl4vqzro*zZ4kL~vFY|V?U}v6&vkrxhTI1KZD4W5rWnfC z6no3Auz!lx5%_QM;mG#vqhqhzQ?NVu`JLyAN3{}KsudBnvUqH&*@(#d`~nBcaqkfK zfVB(;bdgQvQ|XW?S-<2_Mq^g1azR$v9=qjhx6`r@p38lYu(}$fjJXRaM_^oFbBl48 z92M)i=bpEysp3%fl%Yj5%C=%=AC9f*_{!GUw(;!hK6`gF`6KR%Kk;w$au!O}J4M=s zC8|~rJL2`#B;N8`Rr_eN^LpqAVcYJUukyGo9kMPq-;vR3mDcXrUcX+(3$ccs|B!vj zF3L&CrhLr{(KO+jxUc?p273ZO(rD{smXPjw?<(%VVt0w`Y})6^em(Gtykl2?#iOvh z>-KaiU(M4YZ0hr5pxpT`YZda}B@*p8_477T=Zu-xqE4@h{>hnoh?WorElVR zu0a<&)Gd_Vszwo264e|}GhDx(*60XJIw4f2KAudq0}J0eU$uVeo{5d}x662pZjJ{R ztmqsOmk&V6m>IG5j_1bX_*;}T%3aWN=g$D-o5JotpzplL-Zra>LOu4(y-N4zzagYv z#GWRbZ}1~zY!T~!r)g;G}QPZl+hh4nVy-+uiS&W*ek@{0U5sTR> zucey%u&&2Xy|tW=bS>4ZG9zZBj51F$#r#4()aBLfgH{=bRJEY9pKi+CxH9AAasw)%mC=OS73GmV~qJf-2q_%EzsfonV5%sPIO>Gaj@a_Dy>^`s8>b#*$OFY-XQcb% z8Qo)Cw)yF!;ubt zsltjZbmec(;bH0gymjOg`^mhl>kJ)P5lLSmLn2ayil@%oVoXKOOtT#{i!}^uns4Kw Iz9U=q2X32LQvd(} diff --git a/sequencer_checksum.py b/sequencer_checksum.py deleted file mode 100644 index 56d4b140..00000000 --- a/sequencer_checksum.py +++ /dev/null @@ -1,328 +0,0 @@ -from checksumdir import dirhash # package to calculate checksums -import os # package to use and manipulate file paths -import datetime # for timestamp -import time # for sleep -import Tkinter # to open a window/message box -import tkMessageBox -import threading # to run a function in the background -import automate_demultiplex_config as config - -class Nextseq_Integrity_Check(): - def __init__(self): - - # temp folder on the nextseq - self.nextseqtemp_folder = "D:\\Illumina\\NextSeq Control Software Temp" - - # path to the mapped workstation share - self.mapped_workstation_folder = "Z:\\" - - # the filename which denote sequencing has finished - self.RTA_complete = "RTAComplete.txt" - - # the file to write the checksums to - self.output_file = "md5checksum.txt" - - # folder containing files which denote checksum is being calculated - self.checksum_in_progress = "C:\\Users\\sbsuser\\integrity_check\\checksums_inprogress" - - # folder containing files which denote checksum is being calculated - self.run_in_progress = "C:\\Users\\sbsuser\\integrity_check\\run_inprogress" - - # variable to hold the name of the runfolder - self.runfolder = "" - - # variables for the runfolder paths - self.workstation_runfolder = "" - self.sequencer_runfolder = "" - - # checksums match - self.checksum_match = False - - # if testing, overwrite the paths to that of the testing folders (currently on a USB stick) - if config.debug: - # drive letter given to usb stick - self.mapped_drive = "E:\\" - # path to the fake nextseqtemp folder - self.nextseqtemp_folder = self.mapped_drive + "integrity_testing\\sequencer_temp" - # path to the fake workstation folder - self.mapped_workstation_folder = self.mapped_drive + "integrity_testing\\workstation" - # path to the fake checksums_inprogress folder - self.checksum_in_progress = self.mapped_drive + "integrity_testing\\checksums_inprogress" - # path to the fake run in progress folder - self.run_in_progress = self.mapped_drive + "integrity_testing\\run_inprogress" - - def look_for_folder(self): - """ - This script runs every hour. - The script needs to detect when a run has started, and display a window which remains until the integrity test has been performed. - Display a window to say not to do anything until sequencing is complete and integrity checks done. - When checksums are done, display a message box displaying pass/fail messages. - """ - - # for each runfolder in temp folder - for temp_runfolder in os.listdir(self.nextseqtemp_folder): - # if the run has not already been monitored by this script OR it's a testing run - if temp_runfolder not in os.listdir(self.run_in_progress) or config.debug: - # if testing print message - if config.debug: - print "testing run skipping test to see if run already being monitored" - - - # assign run folder name - self.runfolder = temp_runfolder - # create a file to denote this run is being monitored - with open(os.path.join(self.run_in_progress,temp_runfolder),'w') as new_run_marker: - # write timestamp to file - new_run_marker.write(str(datetime.datetime.now())) - - # call function which opens a window to say run in progress - don't do anything until a message box appears denoting integrity check has been performed - # this function will close when the run ends and the checksum has been calculated - self.open_window() - - # call function to assess result of checksum and display message box - # if checksums match (integrity test pass) return a info box - if self.checksum_match: - # create root window which can then be hidden - root = Tkinter.Tk() - # hide - root.withdraw() - tkMessageBox.showinfo("Integrity check complete","Integrity check passed") - # if checksums don't match (integrity test FAIL) return a error box - else: - # create root window which can then be hidden - root = Tkinter.Tk() - # hide - root.withdraw() - tkMessageBox.showerror("Integrity check complete","Integrity check failed - please do not use this sequencer and inform the Bioinformatics team immediately") - - - def open_window(self): - """ - This function uses TKinter to create a window which remains until a process has finished. - This process is complete when the run has finished and checksums have been calculated. - The window closes and is replaced by the info or error in look_for_folder boxes. - """ - # create a object for pop up box - window = Tkinter.Tk() - # set some properties of the message box - # message box size - window.minsize(width=666,height=66) - # message box title - window.title("Integrity check not complete - please wait") - # create a label for inside the message box - label = Tkinter.Label(window, text = "Please don't use this sequencer or close this window until a message box stating \"Integrity check passed\" is displayed") - # display the label in the window - label.pack() - # using threading run the function run_has_finished which closes when the checksums have been generated - thread = threading.Thread(target = self.run_has_finished) - # start parallel computation - thread.start() - # montior this thread - while thread.is_alive(): - # update the window - window.update() - time.sleep(5) - #close this window then all checksums are present. - window.destroy() - - - def run_has_finished(self): - """ - This function looks at the runfolder, assesses if the run has finished and the data transferred. - If required the checksums are generated, or if not the script waits until the checksums have been generated (by the demultiplexing script). - """ - # build path to the runfolder - self.sequencer_runfolder = os.path.join(self.nextseqtemp_folder, self.runfolder) - # build paths on the workstation - self.workstation_runfolder = os.path.join(self.mapped_workstation_folder, self.runfolder) - #flag to denote run and data transfer has finished - finished = False - # while variable finished is false - while not finished: - # check the run has finished and transferred (presence of RTA_complete in the runfolder and on workstation) - if self.RTA_complete in os.listdir(self.sequencer_runfolder) and self.RTA_complete in os.listdir(self.workstation_runfolder): - # if it's a testing run print a message - if config.debug: - print "run finished - skipping integrity_check_first_wait" - else: - # wait the number of hours defined in config file to ensure all file transfers are done - time.sleep(config.integrity_check_first_wait * 3600) - - # call function which triggers the checksum calculations - self.prepare_checksum_calculations() - # now all checksums are done change flag to true so the loop finishes and the window is closed - finished = True - - # if run has not finished - else: - # if a testing run, wait 20 seconds and print a message - if config.debug: - print "waiting 20 seconds for sequencing and data transfer to finish" - time.sleep(20) - # if not testing wait longer - else: - # wait 10 minutes - time.sleep(600) - if config.debug: - print "checksums done" - - - def prepare_checksum_calculations(self): - """ - The checksums are calculated by this script. - This function checks the runfolder has not already been checksummed, marks the folder as being checksummed and then calls the function to generate the checksums. - """ - if config.debug: - print "in prepare_checksum_calculations" - # create name for file to denote checksum in progress - checksum_in_progress_file=self.runfolder+".txt" - # check integrity check has not already been calculated, or isn't currently being calculated and it isn't a testing run. - if not config.debug and self.output_file not in os.listdir(self.workstation_runfolder) and checksum_in_progress_file not in os.listdir(self.checksum_in_progress): - # create a file to denote checksum in progress - with open(os.path.join(self.checksum_in_progress,checksum_in_progress_file),'w') as checksum_in_progress_file_path: - # create a timestamp - now=datetime.datetime.now() - # convert timestamp to string and write to file. - checksum_in_progress_file_path.write(str(now)) - - # call function to generate checksum for workstation and sequencer runfolders - self.run_integrity_check() - # if a test run print statement to explain stopping - elif config.debug: - print "checksums already generated but as testing continuing anyway" - # create a file to denote checksum in progress - with open(os.path.join(self.checksum_in_progress,checksum_in_progress_file),'w') as checksum_in_progress_file_path: - # create a timestamp - now=datetime.datetime.now() - # convert timestamp to string and write to file. - checksum_in_progress_file_path.write(str(now)) - - # call function to generate checksum for workstation and sequencer runfolders - self.run_integrity_check() - - - def run_integrity_check(self): - """ - This function calculates the checksums. - If the checksums do not match it repeats the test until it passes or until the maximum number of attempts is reached - It looks for the presense of any files which should be ignored as they are not copied from temp to output. - The checksums are written to a file on the workstation for the demultiplexing script. - """ - if config.debug: - print "starting integrity checking" - - # set a count for max number of attempts at checksum (one test per hour) - count = 0 - - # while the integrity test is failing and not exceeded the max number of attempts - while not self.checksum_match and count < config.max_number_of_attempts: - # calculate the md5 checksum, using the to_exclude list - workstation_checksum = dirhash(self.workstation_runfolder, 'md5', excluded_files = config.exclude) - sequencer_checksum = dirhash(self.sequencer_runfolder, 'md5', excluded_files = config.exclude) - - # if testing print checksums - if config.debug: - print "workstation checksum = " + workstation_checksum - print "sequencer checksum = " + sequencer_checksum - - # see if the checksums match - if workstation_checksum == sequencer_checksum: - # if they do set self.checksum_match to exit the while loop - self.checksum_match = True - # increase count - count += 1 - - # if checksums fail - else: - # increase count - count += 1 - - # if testing skip the wait - if config.debug: - print "waiting 15 seconds... change the runfolder now!" - time.sleep(15) - else: - # wait the number of hours defined in config file - time.sleep(config.integrity_check_repeat_wait * 3600) - - # report if integrity test has passed or failed after max number of tries - # if failed - if not self.checksum_match: - # write the checksums to the output file (on workstation) - with open(os.path.join(self.workstation_runfolder, self.output_file), 'w') as outputfile: - # record that it failed, with the number of hours - outputfile.write("Checksums do not match after " + str(config.max_number_of_attempts) + " hours\n") - # record the checksums - outputfile.write("workstation checksum (" + self.workstation_runfolder + ")=" + workstation_checksum + "\n") - outputfile.write("sequencer checksum (" + self.sequencer_runfolder + ")=" + sequencer_checksum + "\n") - # call function to identify any files which differ between output and temp - self.identify_missing_files() - - # if test passed - else: - # write the checksums to the output file (on workstation) - with open(os.path.join(self.workstation_runfolder, self.output_file), 'w') as outputfile: - # record that it passed with the number of hours it took - outputfile.write(config.checksum_match +" after "+ str(count) + " hours\n") - # record checksums - outputfile.write("workstation checksum (" + self.workstation_runfolder + ")=" + workstation_checksum + "\n") - outputfile.write("sequencer checksum (" + self.sequencer_runfolder + ")=" + sequencer_checksum + "\n") - - - def identify_missing_files(self): - """ - Loop through the temp folder and if there are any files NOT on the workstation identify them - repeat - looking for any files on workstation that aren't on the sequencer - """ - #create output file - with open(os.path.join(self.workstation_runfolder, config.missing_files_output), 'w') as outputfile: - - # set flag so header only reported first time - workstation_missing = False - # loop through the tempfolder - for root, subfolder, files in os.walk(os.path.join(self.nextseqtemp_folder, self.runfolder)): - # for each file in the list of files in that folder - for file in files: - # set the path of each file - path = os.path.join(root,file) - # create the equivelant path on the workstation - ws_path = path.replace(self.nextseqtemp_folder,self.mapped_workstation_folder) - # if the file doesn't exist and it's not a file already identified as not expected on both folders - if not os.path.isfile(ws_path) and file not in config.exclude: - # if it's the first missing file we've seen - if not workstation_missing: - # print header message - outputfile.write("Missing from Workstation\n") - # set flag so not printed again - workstation_missing = True - # print the path to the extra file - outputfile.write(path) - - # repeat looking for files on workstation that aren't on sequencer - sequencer_missing = False - # loop through all files on workstation runfolder - for root, subfolder, files in os.walk(os.path.join(self.mapped_workstation_folder,self.runfolder)): - # for each file - for file in files: - # set path on workstation - path = os.path.join(root,file) - # replace the path on workstation with the expected sequencer path - sequencer_file_path = path.replace(self.mapped_workstation_folder, self.nextseqtemp_folder) - # if this file doesn't exist - if not os.path.isfile(sequencer_file_path) and file not in config.exclude: - # check if header not already printed - if not sequencer_missing: - # print header - outputfile.write("missing from Nextseq") - # set flag so not printed again - sequencer_missing = True - # print the path to the extra file - outputfile.write(path) - - -def main(): - md5=Nextseq_Integrity_Check() - md5.look_for_folder() - -if __name__ =="__main__": - main() \ No newline at end of file From 8050090743850e6ec84e6ab0efb9fbbf2b7c41aa Mon Sep 17 00:00:00 2001 From: Aled Jones Date: Wed, 5 Sep 2018 17:03:00 +0100 Subject: [PATCH 3/4] Delete crontab.txt moved to seperate repo --- crontab.txt | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 crontab.txt diff --git a/crontab.txt b/crontab.txt deleted file mode 100644 index 200824c1..00000000 --- a/crontab.txt +++ /dev/null @@ -1,32 +0,0 @@ -# Edit this file to introduce tasks to be run by cron. -# -# Each task to run has to be defined through a single line -# indicating with different fields when the task will be run -# and what command to run for the task -# -# To define the time you can provide concrete values for -# minute (m), hour (h), day of month (dom), month (mon), -# and day of week (dow) or use '*' in these fields (for 'any').# -# Notice that tasks will be started based on the cron's system -# daemon's notion of time and timezones. -# -# Output of the crontab jobs (including errors) is sent through -# email to the user the crontab file belongs to (unless redirected). -# -# For example, you can run a backup of all your user accounts -# at 5 a.m every week with: -# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/ -# -# For more information see the manual pages of crontab(5) and cron(8) -# -# m h dom mon dow command -# Demultiplexing -0 * * * * python /home/mokaguys/Documents/apps/automate_demultiplex/demultiplex.py > /home/mokaguys/Documents/automate_demultiplexing_logfiles/Demultiplexing_stdout/$(date "+\%Y\%m\%d_\%H\%M\%S").txt 2>&1 -# Upload Agent -5 * * * * python /home/mokaguys/Documents/apps/automate_demultiplex/DNANexus_upload_agent.py > /home/mokaguys/Documents/automate_demultiplexing_logfiles/Upload_agent_stdout/$(date "+\%Y\%m\%d_\%H\%M\%S").txt 2>&1 -# Workstation heartbeat -*/20 * * * * echo 'padam padam' 2>&1 | /usr/bin/logger -t Heartbeat -# Low space on workstation warning -0 * * * * FREE=$(df /media/data1 --output=avail | tail -n 1 );FREEHR=$(df /media/data1 --output=avail -h | tail -n 1); if [ $FREE -lt 838860371 ]; then echo "Less than 800GB on data1 ("$FREEHR"B)"; fi | /usr/bin/logger -t data1_freespace -# DNANexus platform integrity test -0 4 * * 3 bash /home/mokaguys/Documents/apps/DNANexus_Integrity_Test/DNANexus_Integrity_Check.sh > /home/mokaguys/Documents/apps/DNANexus_Integrity_Test/logs/$(date "+\%y\%m\%d_\%H\%M\%S").txt 2>&1 From fde935f1d6a09dcd111041fa95c017549078cc75 Mon Sep 17 00:00:00 2001 From: Aled Jones Date: Wed, 5 Sep 2018 17:03:48 +0100 Subject: [PATCH 4/4] removed mention of integrity check from readme --- README.md | 9 --------- 1 file changed, 9 deletions(-) diff --git a/README.md b/README.md index 6dfad4e5..04d748cf 100644 --- a/README.md +++ b/README.md @@ -41,12 +41,3 @@ This script looks for newly demultiplexed runs, uploads the fastq files, builds #### Alerts Alerts are sent to Moka-Alerts slack channel -# calculate_nextseq_checksums.py -This script is used to display a message box on the sequencers, with the goal of ensuring data is not lost (eg. by setting off another run) should the data integrity check fail. - -This script is run on the sequencers. -The script identifies when a run has started and opens a message box asking for users not to do anything on the sequencer until the integrity check has been performed. -If the script is running on the nextseq checksums are generated by this script and saved into the runfolder in the workstation. -If the script is on a miseq the script waits until the checksum files are present -The message box then changes displaying a message saying the sequencer can or cannot be used based on the checksum results - \ No newline at end of file