From bdee6d1bc94aae63c2751106893c55a55a57008e Mon Sep 17 00:00:00 2001
From: Aledj2 <Aledjones@nhs.net>
Date: Mon, 3 Sep 2018 15:41:05 +0100
Subject: [PATCH 1/4] multiqc_v1.5 to v1.6

---
 automate_demultiplex_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py
index 5a85f39f..6880c989 100644
--- a/automate_demultiplex_config.py
+++ b/automate_demultiplex_config.py
@@ -9,7 +9,7 @@
 debug = False
 
 # =====git release for the automate_demultiplexing repo=====
-script_release = "v20.0"
+script_release = "v21.0"
 
 # =====location of input/output files=====
 # path to run folders
@@ -84,7 +84,7 @@
 # path to paddy app
 peddy_path = "Apps/peddy_v1.2"
 # path to multiqc app
-multiqc_path = "Apps/multiqc_v1.5"
+multiqc_path = "Apps/multiqc_v1.6"
 # smartsheet app
 smartsheet_path = "Apps/smartsheet_mokapipe_complete_v1.1"
 # RPKM path

From 05001b2d9b9b24ff93e72935864f6891f84710f4 Mon Sep 17 00:00:00 2001
From: Aledj2 <Aledjones@nhs.net>
Date: Tue, 4 Sep 2018 14:58:34 +0100
Subject: [PATCH 2/4] Deleted integrity_test.xml from repo

---
 integrity_test.xml    | Bin 3752 -> 0 bytes
 sequencer_checksum.py | 328 ------------------------------------------
 2 files changed, 328 deletions(-)
 delete mode 100644 integrity_test.xml
 delete mode 100644 sequencer_checksum.py

diff --git a/integrity_test.xml b/integrity_test.xml
deleted file mode 100644
index 3ce28de8acc40d301f5963739da23c950770166e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3752
zcmbW4S#J|d5QXa*iT@DsezTZMAdxZ328k5b7{P-jWa8jNu_wXfY=0j3PPIFpo*6p^
zS(Bc0SDo5USNHt>)wEZ3YeO5@l3m-0jV!k_yRgh&+7nx|Wm{!jVkNU<=6kGW*7w!V
z*0$I7A5!bcyt0-(wnx@vwhPMnOvHvG2CEs8vUF-=ewVgsEBtb7onmw7NXv|8%#5&~
z^ZY<^*}WMed*dGUn7?6l#Tt0LYX9@L%KM7zjv2=jubxc&T*AMmeZoo~gq-ye5@X&K
z{Q=Jp-a<^04f4A#bHcprtl4vqzro*zZ4kL~vFY|V?U}v6&vkrxhTI1KZD4W5rWnfC
z6no3Auz!lx5%_QM;mG#vqhqhzQ?NVu`JLyAN3{}KsudBnvUqH&*@(#d`~nBcaqkfK
zfVB(;bdgQvQ|XW?S-<2_Mq^g1azR$v9=qjhx6`r@p38lYu(}$fjJXRaM_^oFbBl48
z92M)i=bpEysp3%fl%Yj5%C=%=AC9f*_{!GUw(;!hK6`gF`6KR%Kk;w$au!O}J4M=s
zC8|~rJL2`#B;N8`Rr_eN^LpqAVcYJUukyGo9kMPq-;vR3mDcXrUcX+(3$ccs|B!vj
zF3L&CrhLr{(KO+jxUc?p273ZO(rD{smXPjw?<(%VVt0w`Y})6^em(Gtykl2?#iOvh
z>-KaiU(M4YZ0hr5pxpT`YZda}B@*p8_477T=Zu-xqE4@h{>hnoh?Wor<Vx@A>ElVR
zu0a<&)Gd_Vszwo264e|}GhDx(*60XJIw4f2KAudq0}J0eU$uVeo{5d}x662pZjJ{R
ztmqsOmk&V6m>IG5j_1bX_*;}T%3aWN=g$D-o5JotpzplL-Zra>LOu4(y-N4zzagYv
z#GWRbZ}1~zY!T~!r<q#aBDW#xePSHJ^R`E+lUw~rF{iOErjc5ahoKXG@%uqWuh)qo
zHpZS;*W(}7N$mtaD`x}PqD+b-V`?hKoG)PYUl39s)GM^d<}*J%`Q&lbizi;O|Hlf~
z$rY^!RGr}Yea4GA<s8qGEaGaXNB`!>)g;G}QPZl+hh4nVy-+uiS&W*ek@{0U5sTR>
zucey%u&&2Xy|tW=bS>4ZG9zZBj51F$#r#4()aBLfgH{=<P9nwF<2&Gl?x;IxxC6Ht
zhs+)Ns9T)wWlyPq>bRJEY9pKi+CxH9AAasw)%mC=OS73GmV~qJf-2<PrFY0cxJ%w*
zL0=a0PUq%*6zO>q_%EzsfonV5%sPIO>Gaj@a_Dy>^`s8>b#*$OFY-XQ<OZ{sL>cb%
z8Qo)<Y%0r&s*D~ZmhE7F3mbjvp&BSAoqEBCct6N1Uj!ZZLfuv#hAe->Cw)yF!;ubt
zsltjZbmec(;bH0gymjOg`^mhl>kJ)P5lLSmLn2ayil@%oVoXKOOtT#{i!}^uns4Kw
Iz9U=q2X32LQvd(}

diff --git a/sequencer_checksum.py b/sequencer_checksum.py
deleted file mode 100644
index 56d4b140..00000000
--- a/sequencer_checksum.py
+++ /dev/null
@@ -1,328 +0,0 @@
-from checksumdir import dirhash # package to calculate checksums
-import os # package to use and manipulate file paths
-import datetime # for timestamp
-import time # for sleep
-import Tkinter # to open a window/message box
-import tkMessageBox
-import threading # to run a function in the background
-import automate_demultiplex_config as config
-
-class Nextseq_Integrity_Check():
-	def __init__(self):
-		
-		# temp folder on the nextseq
-		self.nextseqtemp_folder = "D:\\Illumina\\NextSeq Control Software Temp"
-			
-		# path to the mapped workstation share
-		self.mapped_workstation_folder = "Z:\\"
-		
-		# the filename which denote sequencing has finished
-		self.RTA_complete = "RTAComplete.txt"
-		
-		# the file to write the checksums to
-		self.output_file = "md5checksum.txt"
-		
-		# folder containing files which denote checksum is being calculated
-		self.checksum_in_progress = "C:\\Users\\sbsuser\\integrity_check\\checksums_inprogress"
-
-		# folder containing files which denote checksum is being calculated
-		self.run_in_progress = "C:\\Users\\sbsuser\\integrity_check\\run_inprogress"
-		
-		# variable to hold the name of the runfolder
-		self.runfolder = ""
-
-		# variables for the runfolder paths
-		self.workstation_runfolder = ""
-		self.sequencer_runfolder = ""
-
-		# checksums match
-		self.checksum_match = False
-
-		# if testing, overwrite the paths to that of the testing folders (currently on a USB stick)
-		if config.debug:
-			# drive letter given to usb stick
-			self.mapped_drive = "E:\\"
-			# path to the fake nextseqtemp folder
-			self.nextseqtemp_folder = self.mapped_drive + "integrity_testing\\sequencer_temp"
-			# path to the fake workstation folder
-			self.mapped_workstation_folder = self.mapped_drive + "integrity_testing\\workstation"
-			# path to the fake checksums_inprogress folder
-			self.checksum_in_progress = self.mapped_drive + "integrity_testing\\checksums_inprogress"	
-			# path to the fake run in progress folder
-			self.run_in_progress = self.mapped_drive + "integrity_testing\\run_inprogress"
-
-	def look_for_folder(self):
-		"""
-		This script runs every hour.
-		The script needs to detect when a run has started, and display a window which remains until the integrity test has been performed.
-		Display a window to say not to do anything until sequencing is complete and integrity checks done.
-		When checksums are done, display a message box displaying pass/fail messages.
-		"""
-
-		# for each runfolder in temp folder
-		for temp_runfolder in os.listdir(self.nextseqtemp_folder):
-			# if the run has not already been monitored by this script OR it's a testing run
-			if temp_runfolder not in os.listdir(self.run_in_progress) or config.debug:
-				# if testing print message
-				if config.debug:
-					print "testing run skipping test to see if run already being monitored"
-
-			
-				# assign run folder name
-				self.runfolder = temp_runfolder
-				# create a file to denote this run is being monitored
-				with open(os.path.join(self.run_in_progress,temp_runfolder),'w') as new_run_marker:
-					# write timestamp to file
-					new_run_marker.write(str(datetime.datetime.now()))
-				
-				# call function which opens a window to say run in progress - don't do anything until a message box appears denoting integrity check has been performed
-				# this function will close when the run ends and the checksum has been calculated
-				self.open_window()
-
-				# call function to assess result of checksum and display message box
-				# if checksums match (integrity test pass) return a info box
-				if self.checksum_match:
-					# create root window which can then be hidden
-					root = Tkinter.Tk()
-					# hide 
-					root.withdraw()
-					tkMessageBox.showinfo("Integrity check complete","Integrity check passed")
-					# if checksums don't match (integrity test FAIL) return a error box
-				else:
-					# create root window which can then be hidden
-					root = Tkinter.Tk()
-					# hide 
-					root.withdraw()
-					tkMessageBox.showerror("Integrity check complete","Integrity check failed - please do not use this sequencer and inform the Bioinformatics team immediately")
-
-
-	def open_window(self):
-		"""
-		This function uses TKinter to create a window which remains until a process has finished.
-		This process is complete when the run has finished and checksums have been calculated.
-		The window closes and is replaced by the info or error in look_for_folder boxes.
-		"""
-		# create a object for pop up box
-		window = Tkinter.Tk()
-		# set some properties of the message box
-		# message box size
-		window.minsize(width=666,height=66) 
-		# message box title
-		window.title("Integrity check not complete - please wait") 
-		# create a label for inside the message box
-		label = Tkinter.Label(window, text = "Please don't use this sequencer or close this window until a message box stating \"Integrity check passed\" is displayed")
-		# display the label in the window
-		label.pack()
-		# using threading run the function run_has_finished which closes when the checksums have been generated
-		thread = threading.Thread(target = self.run_has_finished)
-		# start parallel computation
-		thread.start() 
-		# montior this thread
-		while thread.is_alive():
-			# update the window
-			window.update()
-			time.sleep(5)
-		#close this window then all checksums are present.
-		window.destroy()
-
-
-	def run_has_finished(self):
-		"""
-		This function looks at the runfolder, assesses if the run has finished and the data transferred.
-		If required the checksums are generated, or if not the script waits until the checksums have been generated (by the demultiplexing script).
-		"""
-		# build path to the runfolder
-		self.sequencer_runfolder = os.path.join(self.nextseqtemp_folder, self.runfolder)
-		# build paths on the workstation
-		self.workstation_runfolder = os.path.join(self.mapped_workstation_folder, self.runfolder)
-		#flag to denote run and data transfer has finished
-		finished = False
-		# while variable finished is false
-		while not finished:
-			# check the run has finished and transferred (presence of RTA_complete in the runfolder and on workstation)
-			if self.RTA_complete in os.listdir(self.sequencer_runfolder) and self.RTA_complete in os.listdir(self.workstation_runfolder):
-					# if it's a testing run print a message
-					if config.debug:
-						print "run finished - skipping integrity_check_first_wait"
-					else:
-						# wait the number of hours defined in config file to ensure all file transfers are done
-						time.sleep(config.integrity_check_first_wait * 3600)
-						
-					# call function which triggers the checksum calculations
-					self.prepare_checksum_calculations()
-					# now all checksums are done change flag to true so the loop finishes and the window is closed
-					finished = True
-			
-			# if run has not finished 
-			else:
-				# if a testing run, wait 20 seconds and print a message
-				if config.debug:
-					print "waiting 20 seconds for sequencing and data transfer to finish"
-					time.sleep(20)
-				# if not testing wait longer
-				else:
-					# wait 10 minutes
-					time.sleep(600)
-		if config.debug:
-			print "checksums done"
-			
-
-	def prepare_checksum_calculations(self):
-		"""
-		The checksums are calculated by this script.
-		This function checks the runfolder has not already been checksummed, marks the folder as being checksummed and then calls the function to generate the checksums.
-		"""
-		if config.debug:
-			print "in prepare_checksum_calculations"
-		# create name for file to denote checksum in progress
-		checksum_in_progress_file=self.runfolder+".txt"
-		# check integrity check has not already been calculated, or isn't currently being calculated and it isn't a testing run.
-		if not config.debug and self.output_file not in os.listdir(self.workstation_runfolder) and checksum_in_progress_file not in os.listdir(self.checksum_in_progress):
-			# create a file to denote checksum in progress
-			with open(os.path.join(self.checksum_in_progress,checksum_in_progress_file),'w') as checksum_in_progress_file_path:
-				# create a timestamp
-				now=datetime.datetime.now()
-				# convert timestamp to string and write to file.
-				checksum_in_progress_file_path.write(str(now))
-
-			# call function to generate checksum for workstation and sequencer runfolders
-			self.run_integrity_check()
-		# if a test run print statement to explain stopping
-		elif config.debug:
-			print "checksums already generated but as testing continuing anyway"
-				# create a file to denote checksum in progress
-			with open(os.path.join(self.checksum_in_progress,checksum_in_progress_file),'w') as checksum_in_progress_file_path:
-				# create a timestamp
-				now=datetime.datetime.now()
-				# convert timestamp to string and write to file.
-				checksum_in_progress_file_path.write(str(now))
-
-			# call function to generate checksum for workstation and sequencer runfolders
-			self.run_integrity_check()
-						
-
-	def run_integrity_check(self):
-		"""
-		This function calculates the checksums.
-		If the checksums do not match it repeats the test until it passes or until the maximum number of attempts is reached
-		It looks for the presense of any files which should be ignored as they are not copied from temp to output.
-		The checksums are written to a file on the workstation for the demultiplexing script.
-		"""
-		if config.debug:
-			print "starting integrity checking"
-
-		# set a count for max number of attempts at checksum (one test per hour)
-		count = 0
-
-		# while the integrity test is failing and not exceeded the max number of attempts
-		while not self.checksum_match and count < config.max_number_of_attempts:		
-			# calculate the md5 checksum, using the to_exclude list
-			workstation_checksum = dirhash(self.workstation_runfolder, 'md5', excluded_files = config.exclude) 
-			sequencer_checksum = dirhash(self.sequencer_runfolder, 'md5', excluded_files = config.exclude)
-			
-			# if testing print checksums
-			if config.debug:
-				print "workstation checksum = " + workstation_checksum
-				print "sequencer checksum = " + sequencer_checksum	   
-
-			# see if the checksums match
-			if workstation_checksum == sequencer_checksum:
-				# if they do set self.checksum_match to exit the while loop
-				self.checksum_match = True
-				# increase count
-				count += 1
-			
-			# if checksums fail 
-			else:
-				# increase count
-				count += 1
-				
-				# if testing skip the wait
-				if config.debug:
-					print "waiting 15 seconds... change the runfolder now!"
-					time.sleep(15)
-				else:
-					# wait the number of hours defined in config file
-					time.sleep(config.integrity_check_repeat_wait * 3600)
-		
-		# report if integrity test has passed or failed after max number of tries
-		# if failed
-		if not self.checksum_match:
-			# write the checksums to the output file (on workstation)
-			with open(os.path.join(self.workstation_runfolder, self.output_file), 'w') as outputfile:
-				# record that it failed, with the number of hours
-				outputfile.write("Checksums do not match after " + str(config.max_number_of_attempts) + " hours\n")
-				# record the checksums
-				outputfile.write("workstation checksum (" + self.workstation_runfolder + ")=" + workstation_checksum + "\n")
-				outputfile.write("sequencer checksum (" + self.sequencer_runfolder + ")=" + sequencer_checksum + "\n")
-				# call function to identify any files which differ between output and temp
-				self.identify_missing_files()
-
-		# if test passed
-		else:
-			# write the checksums to the output file (on workstation)
-			with open(os.path.join(self.workstation_runfolder, self.output_file), 'w') as outputfile:
-				# record that it passed with the number of hours it took
-				outputfile.write(config.checksum_match +" after "+ str(count) + " hours\n")
-				# record checksums
-				outputfile.write("workstation checksum (" + self.workstation_runfolder + ")=" + workstation_checksum + "\n")
-				outputfile.write("sequencer checksum (" + self.sequencer_runfolder + ")=" + sequencer_checksum + "\n")
-
-
-	def identify_missing_files(self):
-		"""
-		Loop through the temp folder and if there are any files NOT on the workstation identify them
-		repeat - looking for any files on workstation that aren't on the sequencer
-		"""
-		#create output file
-		with open(os.path.join(self.workstation_runfolder, config.missing_files_output), 'w') as outputfile:
-
-      # set flag so header only reported first time
-			workstation_missing = False
-			# loop through the tempfolder
-			for root, subfolder, files in os.walk(os.path.join(self.nextseqtemp_folder, self.runfolder)):
-				# for each file in the list of files in that folder
-				for file in files:
-					# set the path of each file
-					path = os.path.join(root,file)
-					# create the equivelant path on the workstation
-					ws_path = path.replace(self.nextseqtemp_folder,self.mapped_workstation_folder)
-					# if the file doesn't exist and it's not a file already identified as not expected on both folders
-					if not os.path.isfile(ws_path) and file not in config.exclude:
-						# if it's the first missing file we've seen 
-						if not workstation_missing:
-							# print header message
-							outputfile.write("Missing from Workstation\n")
-							# set flag so not printed again
-							workstation_missing = True
-						# print the path to the extra file
-						outputfile.write(path)
-      
-      		# repeat looking for files on workstation that aren't on sequencer
-			sequencer_missing = False
-			# loop through all files on workstation runfolder 
-			for root, subfolder, files in os.walk(os.path.join(self.mapped_workstation_folder,self.runfolder)):
-				# for each file
-				for file in files:
-					# set path on workstation
-					path = os.path.join(root,file)
-					# replace the path on workstation with the expected sequencer path
-					sequencer_file_path = path.replace(self.mapped_workstation_folder, self.nextseqtemp_folder)
-					# if this file doesn't exist
-					if not os.path.isfile(sequencer_file_path) and file not in config.exclude:
-						# check if header not already printed
-						if not sequencer_missing:
-							# print header
-							outputfile.write("missing from Nextseq")
-							# set flag so not printed again
-							sequencer_missing = True
-						# print the path to the extra file
-						outputfile.write(path)
-              
-
-def main():
-	md5=Nextseq_Integrity_Check()
-	md5.look_for_folder()
-	
-if __name__ =="__main__":
-	main()
\ No newline at end of file

From 8050090743850e6ec84e6ab0efb9fbbf2b7c41aa Mon Sep 17 00:00:00 2001
From: Aled Jones <aledjones@nhs.net>
Date: Wed, 5 Sep 2018 17:03:00 +0100
Subject: [PATCH 3/4] Delete crontab.txt

moved to seperate repo
---
 crontab.txt | 32 --------------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 crontab.txt

diff --git a/crontab.txt b/crontab.txt
deleted file mode 100644
index 200824c1..00000000
--- a/crontab.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-# Edit this file to introduce tasks to be run by cron.
-# 
-# Each task to run has to be defined through a single line
-# indicating with different fields when the task will be run
-# and what command to run for the task
-# 
-# To define the time you can provide concrete values for
-# minute (m), hour (h), day of month (dom), month (mon),
-# and day of week (dow) or use '*' in these fields (for 'any').# 
-# Notice that tasks will be started based on the cron's system
-# daemon's notion of time and timezones.
-# 
-# Output of the crontab jobs (including errors) is sent through
-# email to the user the crontab file belongs to (unless redirected).
-# 
-# For example, you can run a backup of all your user accounts
-# at 5 a.m every week with:
-# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/
-# 
-# For more information see the manual pages of crontab(5) and cron(8)
-# 
-# m h  dom mon dow   command
-# Demultiplexing
-0 * * * * python /home/mokaguys/Documents/apps/automate_demultiplex/demultiplex.py > /home/mokaguys/Documents/automate_demultiplexing_logfiles/Demultiplexing_stdout/$(date "+\%Y\%m\%d_\%H\%M\%S").txt 2>&1
-# Upload Agent
-5 * * * * python /home/mokaguys/Documents/apps/automate_demultiplex/DNANexus_upload_agent.py > /home/mokaguys/Documents/automate_demultiplexing_logfiles/Upload_agent_stdout/$(date "+\%Y\%m\%d_\%H\%M\%S").txt 2>&1
-# Workstation heartbeat
-*/20 * * * * echo 'padam padam' 2>&1 | /usr/bin/logger -t Heartbeat
-# Low space on workstation warning
-0 * * * * FREE=$(df /media/data1 --output=avail | tail -n 1 );FREEHR=$(df /media/data1 --output=avail -h | tail -n 1); if [ $FREE -lt 838860371 ]; then echo "Less than 800GB on data1 ("$FREEHR"B)"; fi | /usr/bin/logger -t data1_freespace
-# DNANexus platform integrity test
-0 4 * * 3 bash /home/mokaguys/Documents/apps/DNANexus_Integrity_Test/DNANexus_Integrity_Check.sh > /home/mokaguys/Documents/apps/DNANexus_Integrity_Test/logs/$(date "+\%y\%m\%d_\%H\%M\%S").txt 2>&1

From fde935f1d6a09dcd111041fa95c017549078cc75 Mon Sep 17 00:00:00 2001
From: Aled Jones <aledjones@nhs.net>
Date: Wed, 5 Sep 2018 17:03:48 +0100
Subject: [PATCH 4/4] removed mention of integrity check from readme

---
 README.md | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/README.md b/README.md
index 6dfad4e5..04d748cf 100644
--- a/README.md
+++ b/README.md
@@ -41,12 +41,3 @@ This script looks for newly demultiplexed runs, uploads the fastq files, builds
 #### Alerts
 Alerts are sent to Moka-Alerts slack channel
 
-# calculate_nextseq_checksums.py
-This script is used to display a message box on the sequencers, with the goal of ensuring data is not lost (eg. by setting off another run) should the data integrity check fail. 
-
-This script is run on the sequencers. 
-The script identifies when a run has started and opens a message box asking for users not to do anything on the sequencer until the integrity check has been performed.
-If the script is running on the nextseq checksums are generated by this script and saved into the runfolder in the workstation. 
-If the script is on a miseq the script waits until the checksum files are present 
-The message box then changes displaying a message saying the sequencer can or cannot be used based on the checksum results
- 
\ No newline at end of file