From 9ac900c5d6ea9fcde91fa15565a8a912381bc874 Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 20 Jul 2023 14:37:58 +0100 Subject: [PATCH 01/16] add conda env in gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 062556ec..5900edc1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ test/data .vscode share/ make_panel_fastqs/ -test.log \ No newline at end of file +test.log +venv/ From 753ab116378aacb42d26214f44bed267ec132d49 Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 20 Jul 2023 14:58:29 +0100 Subject: [PATCH 02/16] set testing to true --- automate_demultiplex_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index ac273046..d8cffc98 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -8,7 +8,7 @@ import os # Set debug mode -testing = False +testing = True # =====location of input/output files===== # root of folder that contains the apps, automate_demultiplexing_logfiles and From 341562d4dd225f6b270dd851acc50a36714be8f8 Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 20 Jul 2023 15:04:19 +0100 Subject: [PATCH 03/16] update panel list --- automate_demultiplex_config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index d8cffc98..c0a0e8a3 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -485,6 +485,8 @@ "Pan4964", # VCP2 Viapath R259 (nijmegen) "Pan4130", # VCP2 Viapath R211 (polyposis) "Pan5121", # VCP2 Viapath R430 (prostate) + "Pan5143", # VCP2 Viapath R444.1 Breast cancer (PARP treatment) + "Pan5147", # VCP2 Viapath R444.2 Prostate cancer (PARP treatment) "Pan4132", # VCP3 Viapath R56 "Pan4134", # VCP3 Viapath R57 "Pan4136", # VCP3 Viapath R58 @@ -508,6 +510,8 @@ "Pan4819", # VCP2 STG R210 lynch "Pan4820", # VCP2 STG R211 polyposis "Pan5122", # VCP2 STG R430 prostate + "Pan5144", # VCP2 STG R444.1 Breast cancer (PARP treatment) + "Pan5148", # VCP2 STG R444.2 Prostate cancer (PARP treatment) "Pan4826", # VCP3 STG R56 "Pan4827", # VCP3 STG R57 "Pan4828", # VCP3 STG R58 From efb9bee7d94d09726ec11716fabcd4efb5eebb6d Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 20 Jul 2023 15:20:42 +0100 Subject: [PATCH 04/16] add panel settings --- automate_demultiplex_config.py | 53 +++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index c0a0e8a3..5ec4724a 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -578,6 +578,10 @@ "Pan4964", "Pan5121", "Pan5122", + "Pan5143", + "Pan5144", + "Pan5147", + "Pan5148" ] vcp3_panel_list = [ "Pan4132", @@ -746,6 +750,30 @@ "variant_calling_bedfile": "Pan5119data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", }, + "Pan5144": { # VCP2 R444.1 Breast cancer (PARP treatment- STG) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan5109", + "RPKM_also_analyse": vcp2_panel_list, + "congenica_credentials": "STG", + "congenica_IR_template": "non-priority", #TO DO check if priority is enabled + "congenica_project": #waiting on monogenics, + "hsmetrics_bedfile": "Pan5123data.bed", + "variant_calling_bedfile": "Pan5119data.bed", + "sambamba_bedfile": "Pan5123dataSambamba.bed", + }, + "Pan5148": { # VCP2 R444.2 Prostate cancer (PARP treatment- STG) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan5109", + "RPKM_also_analyse": vcp2_panel_list, + "congenica_credentials": "STG", + "congenica_IR_template": "non-priority", # TO DO check if priority is enabled + "congenica_project": #waiting on monogenics, + "hsmetrics_bedfile": "Pan5123data.bed", + "variant_calling_bedfile": "Pan5119data.bed", + "sambamba_bedfile": "Pan5123dataSambamba.bed", + }, "Pan4009": { # MokaSNP "mokasnp": True, "multiqc_coverage_level": 30, @@ -1010,6 +1038,28 @@ "variant_calling_bedfile": "Pan5119data.bed", "polyedge": "MSH2", }, + "Pan5143": { # VCP2 R444.1 Breast cancer (PARP treatment- Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan5109", + "congenica_project": "12814", + "RPKM_also_analyse": vcp2_panel_list, + "hsmetrics_bedfile": "Pan5123data.bed", + "sambamba_bedfile": "Pan5123dataSambamba.bed", + "variant_calling_bedfile": "Pan5119data.bed", + "polyedge": "MSH2", + }, + "Pan5147": { # VCP2 R444.2 Prostate cancer (PARP treatment- Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan5109", + "congenica_project": "12814", + "RPKM_also_analyse": vcp2_panel_list, + "hsmetrics_bedfile": "Pan5123data.bed", + "sambamba_bedfile": "Pan5123dataSambamba.bed", + "variant_calling_bedfile": "Pan5119data.bed", + "polyedge": "MSH2", + }, "Pan4132": { # VCP3 R56 (Viapath) "mokapipe": True, "multiqc_coverage_level": 30, @@ -1678,7 +1728,8 @@ "-istg_pannumbers=Pan4042,Pan4043,Pan4044,Pan4049,Pan4821,Pan4822," "Pan4823,Pan4824,Pan4825,Pan4816,Pan4817,Pan4818,Pan4819,Pan4820," "Pan4826,Pan4827,Pan4828,Pan4829,Pan4830,Pan4831,Pan4832,Pan4833," - "Pan4834,Pan4835,Pan4836,Pan5008,Pan5010,Pan5012,Pan5014,Pan5122" + "Pan4834,Pan4835,Pan4836,Pan5008,Pan5010,Pan5012,Pan5014,Pan5122," + "Pan5144,Pan5148" ), "cp_capture_pannos": "-icp_capture_pannos=Pan5109,Pan4399,Pan4362", } From 3a569d5a140bdc3f9d141e3d5ea42b14589d08ad Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 20 Jul 2023 15:25:34 +0100 Subject: [PATCH 05/16] update multiqc version --- automate_demultiplex_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index 5ec4724a..a505695f 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -164,7 +164,7 @@ # path to paddy app peddy_path = "Apps/peddy_v1.5" # path to multiqc app -multiqc_path = "Apps/multiqc_v1.17.0" +multiqc_path = "Apps/multiqc_v1.18.0" # path to congenica upload app congenica_app_path = "Apps/congenica_upload_v1.3.2" congenica_SFTP_upload_app = "applet-GFfJpj80jy1x1Bz1P1Bk3vQf" From 0189ece4081b408230f1da8a0a53054245612d7e Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 20 Jul 2023 15:32:59 +0100 Subject: [PATCH 06/16] update TSO coverage BED file, close #498 --- automate_demultiplex_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index a505695f..a5d8ac60 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -1228,7 +1228,7 @@ }, "Pan4969": { # TSO500 no UTRs. TERT promoter "TSO500": True, - "sambamba_bedfile": "Pan4969dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, @@ -1237,7 +1237,7 @@ "Pan5085": { # TSO500 High throughput Synnovis. no UTRs. TERT promoter "TSO500": True, "TSO500_high_throughput": True, - "sambamba_bedfile": "Pan4969dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, @@ -1246,7 +1246,7 @@ "Pan5112": { # TSO500 High throughput BSPS. no UTRs. TERT promoter "TSO500": True, "TSO500_high_throughput": True, - "sambamba_bedfile": "Pan4969dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, @@ -1256,7 +1256,7 @@ "Pan5114": { # TSO500 High throughput Control. no UTRs. TERT promoter "TSO500": True, "TSO500_high_throughput": True, - "sambamba_bedfile": "Pan4969dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, From d8b9acc249fe554db27410d12b08c16ad987b2d4 Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 20 Jul 2023 17:24:49 +0100 Subject: [PATCH 07/16] fix tso coverage grouping, close #494 --- automate_demultiplex_config.py | 4 +--- upload_and_setoff_workflows.py | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index a5d8ac60..ece85b7a 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -756,7 +756,7 @@ "RPKM_bedfile_pan_number": "Pan5109", "RPKM_also_analyse": vcp2_panel_list, "congenica_credentials": "STG", - "congenica_IR_template": "non-priority", #TO DO check if priority is enabled + "congenica_IR_template": "non-priority", #TODO check if priority is enabled "congenica_project": #waiting on monogenics, "hsmetrics_bedfile": "Pan5123data.bed", "variant_calling_bedfile": "Pan5119data.bed", @@ -1047,7 +1047,6 @@ "hsmetrics_bedfile": "Pan5123data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", "variant_calling_bedfile": "Pan5119data.bed", - "polyedge": "MSH2", }, "Pan5147": { # VCP2 R444.2 Prostate cancer (PARP treatment- Viapath) "mokapipe": True, @@ -1058,7 +1057,6 @@ "hsmetrics_bedfile": "Pan5123data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", "variant_calling_bedfile": "Pan5119data.bed", - "polyedge": "MSH2", }, "Pan4132": { # VCP3 R56 (Viapath) "mokapipe": True, diff --git a/upload_and_setoff_workflows.py b/upload_and_setoff_workflows.py index 5b476585..ad6701a0 100644 --- a/upload_and_setoff_workflows.py +++ b/upload_and_setoff_workflows.py @@ -1534,6 +1534,7 @@ def start_building_dx_run_cmds(self): if TSO500: for sample in self.list_of_processed_samples: + pannumber = re.search(r"Pan\d+", sample).group() commands_list.append( self.create_sambamba_cmd(sample, pannumber) ) From 07d21e23b1896f9bb825b8514f6d2bac141e2c9b Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 27 Jul 2023 11:07:38 +0100 Subject: [PATCH 08/16] Switch to dockerised bcl2fastq2 --- automate_demultiplex_config.py | 21 +++++++++++++-------- demultiplex.py | 21 ++++++++++++--------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index ece85b7a..3a5a9b7e 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -33,8 +33,13 @@ # path to fastq files fastq_folder = "/Data/Intensities/BaseCalls" -# path to bcl2fastq -bcl2fastq = "/usr/local/bcl2fastq2-v2.20.0.422/bin/bcl2fastq" +# bcl2fastq base command +bcl2fastq_test_cmd = "sudo docker run --rm seglh/bcl2fastq2:v2.20.0.422_25dd0c0" +bcl2fastq_cmd = ( + "sudo docker run --rm -v %s:/mnt/run -v %s:/mnt/run/%s " + "seglh/bcl2fastq2:v2.20.0.422_25dd0c0 -R /mnt/run --sample-sheet /mnt/run/%s " + "--no-lane-splitting >> %s 2>&1" +) # files for checking NGS runfolders before demultiplexing file_complete_run = "RTAComplete.txt" @@ -756,8 +761,8 @@ "RPKM_bedfile_pan_number": "Pan5109", "RPKM_also_analyse": vcp2_panel_list, "congenica_credentials": "STG", - "congenica_IR_template": "non-priority", #TODO check if priority is enabled - "congenica_project": #waiting on monogenics, + "congenica_IR_template": "non-priority", #TODO check if priority is enabled + "congenica_project": "",#waiting on monogenics, "hsmetrics_bedfile": "Pan5123data.bed", "variant_calling_bedfile": "Pan5119data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", @@ -768,8 +773,8 @@ "RPKM_bedfile_pan_number": "Pan5109", "RPKM_also_analyse": vcp2_panel_list, "congenica_credentials": "STG", - "congenica_IR_template": "non-priority", # TO DO check if priority is enabled - "congenica_project": #waiting on monogenics, + "congenica_IR_template": "non-priority", # TO DO check if priority is enabled + "congenica_project": "",#waiting on monogenics, "hsmetrics_bedfile": "Pan5123data.bed", "variant_calling_bedfile": "Pan5119data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", @@ -1038,7 +1043,7 @@ "variant_calling_bedfile": "Pan5119data.bed", "polyedge": "MSH2", }, - "Pan5143": { # VCP2 R444.1 Breast cancer (PARP treatment- Viapath) + "Pan5143": { # VCP2 R444.1 Breast cancer (PARP treatment- Viapath) "mokapipe": True, "multiqc_coverage_level": 30, "RPKM_bedfile_pan_number": "Pan5109", @@ -1048,7 +1053,7 @@ "sambamba_bedfile": "Pan5123dataSambamba.bed", "variant_calling_bedfile": "Pan5119data.bed", }, - "Pan5147": { # VCP2 R444.2 Prostate cancer (PARP treatment- Viapath) + "Pan5147": { # VCP2 R444.2 Prostate cancer (PARP treatment- Viapath) "mokapipe": True, "multiqc_coverage_level": 30, "RPKM_bedfile_pan_number": "Pan5109", diff --git a/demultiplex.py b/demultiplex.py index b5265cf0..4eaabe9c 100644 --- a/demultiplex.py +++ b/demultiplex.py @@ -150,7 +150,8 @@ def __init__(self, now): self.processed_runfolders = [] # Path to bcl2fastq - self.bcl2fastq = config.bcl2fastq + self.bcl2fastq_test = config.bcl2fastq_test_cmd + self.bcl2fastq_cmd = config.bcl2fastq_cmd # Set script log file path and name for this hour's cron job (script log file). self.script_logfile_path = config.demultiplex_logfiles @@ -336,8 +337,7 @@ def check_for_TSO500(self): for pannum in config.tso500_panel_list: if pannum in line: return True - return False - + return False def run_demultiplexing(self): """Run bcl2fastq using runfolder as input. Create demultiplex log file in runfolder.""" @@ -368,10 +368,13 @@ def run_demultiplexing(self): # --no-lane-splitting >> # /media/data1/share/1111_M02353_NMNOV17_ONCTEST/bcl2fastq2_output.log 2&>1" # where --no-lane-splitting creates a single fastq for a sample, not into one fastq per lane - command = (self.bcl2fastq + " -R " + self.runfolders + "/" + self.runfolder + - " --sample-sheet " + self.samplesheet_path + " --no-lane-splitting >> " + - demultiplex_log + " 2>&1") - + command = ( + self.bcl2fastq_cmd % ( + os.path.join(self.runfolders, self.runfolder), + self.samplesheet_path, self.samplesheet, self.samplesheet, + demultiplex_log + ) + ) # Write progress/status to script log file self.script_logfile.write("running bcl2fastq. command = " + command + "\n") # Add entry to system log @@ -453,8 +456,8 @@ def send_an_email(self): def test_bcl2fastq(self): """Raise exception if bcl2fastq is not installed.""" - # call the path to bcl2fastq2 using subprocess to capture the stderr and stdout. NB the required text is in stderr not stdout - proc = subprocess.Popen([self.bcl2fastq], stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) + # Run bcl2fastq docker image using subprocess to capture the stderr and stdout. NB the required text is in stderr not stdout + proc = subprocess.Popen([self.bcl2fastq_test], stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) # Capture the streams (out, err) = proc.communicate() From 70e49a9d5cbef42de63f86f77f8234c2873c1046 Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 27 Jul 2023 12:09:09 +0100 Subject: [PATCH 09/16] Fix send_an_email() function with correct email server username, close #499 --- demultiplex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demultiplex.py b/demultiplex.py index 4eaabe9c..67b4d001 100644 --- a/demultiplex.py +++ b/demultiplex.py @@ -443,7 +443,7 @@ def send_an_email(self): # Identify client to ESMTP server using EHLO commands server.ehlo() # Login to server with user credentials - server.login("abc", self.pw) + server.login(self.user, self.pw) # Send email to server. Message is a call to email.Message.as_string() server.sendmail(self.me, [self.you], m.as_string()) # Write to script log file From 36783b47dd8d346c57112c8f3d8e4c96aab778f0 Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 27 Jul 2023 12:30:13 +0100 Subject: [PATCH 10/16] Update incorrect RPKM VCP3 pan number (Pan3974 changed to Pan4362), close #495 --- automate_demultiplex_config.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index 3a5a9b7e..0e05c6cc 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -799,7 +799,7 @@ "Pan4043": { # VCP3 STG "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1335,7 +1335,7 @@ "Pan4826": { # VCP3 STG R56 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1347,7 +1347,7 @@ "Pan4827": { # VCP3 STG R57 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1359,7 +1359,7 @@ "Pan4828": { # VCP3 STG R58 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1371,7 +1371,7 @@ "Pan4829": { # VCP3 STG R60 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1383,7 +1383,7 @@ "Pan4830": { # VCP3 STG R62 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1395,7 +1395,7 @@ "Pan4831": { # VCP3 STG R66 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1407,7 +1407,7 @@ "Pan4832": { # VCP3 STG R78 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1419,7 +1419,7 @@ "Pan4833": { # VCP3 STG R79 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1431,7 +1431,7 @@ "Pan4834": { # VCP3 STG R81 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1443,7 +1443,7 @@ "Pan4835": { # VCP3 STG R82 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", @@ -1455,7 +1455,7 @@ "Pan4836": { # VCP3 STG R229 "mokapipe": True, "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_bedfile_pan_number": "Pan4362", "RPKM_also_analyse": vcp3_panel_list, "congenica_credentials": "STG", "congenica_IR_template": "non-priority", From d0e3b5c726be777d5905b12cc71db26258ea463f Mon Sep 17 00:00:00 2001 From: mokaguys Date: Thu, 27 Jul 2023 12:40:37 +0100 Subject: [PATCH 11/16] Remove MokaCAN as obsolete, close #482 --- automate_demultiplex_config.py | 39 ------------------ upload_and_setoff_workflows.py | 73 ---------------------------------- 2 files changed, 112 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index 0e05c6cc..bdceb4a9 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -133,8 +133,6 @@ archerDx_pipeline_ID = "5238" # MokaSNP ID mokasnp_pipeline_ID = "5091" -# mokacan pipeline ID -mokacan_pipeline_ID = "4728" # TSO500 pipeline ID TSO_pipeline_ID = "5237" @@ -162,8 +160,6 @@ # path to mokaamp mokaamp_path = "Workflows/MokaAMP_v2.2" -# path to mokacan -mokacan_path = "Workflows/MokaCAN_v1.0" # path to mokasnp mokasnp_path = "Workflows/MokaSNP_v1.2.0" # path to paddy app @@ -332,37 +328,6 @@ mokaamp_vardict_reference_stage = " -istage-G0vKZk80GfYkQx86PJGGjz9Y.ref_genome=project-ByfFPz00jy1fk6PjpZ95F27J:file-ByYgX700b80gf4ZY1GxvF3Jv" mokaamp_varscan_reference_stage = " -istage-FPzGjp80jy1V3Jvb5z6xfpfZ.ref_genome=project-ByfFPz00jy1fk6PjpZ95F27J:file-ByYgX700b80gf4ZY1GxvF3Jv" -# MokaCAN - stages which may change between samples -mokacan_fastqc_r1_stage = " -istage-FPzGj6Q0jy1fF6505zFP6zz5.reads=" -mokacan_fastqc_r2_stage = " -istage-FPzGj5j0jy1x97jg607Fg229.reads=" -mokacan_picard_bedfile_stage = ( - " -istage-FPzGjV80jy1x97jg607Fg22b.vendor_exome_bedfile=" -) -mokacan_picard_capturetype_stage = ( - " -istage-FPzGjV80jy1x97jg607Fg22b.Capture_panel=" -) -mokacan_sambamba_bedfile_stage = ( - " -istage-FPzGjfQ0jy1y01vG60K22qG1.sambamba_bed=" -) -mokacan_vardict_bedfile_stage = " -istage-FPzGjgj0jy1Q2JJF2zYx5J5k.bedfile=" -mokacan_sentieon_sample_name_stage = ( - " -istage-FgYgB2Q087fjzvxy9f4q1K8X.sample=" -) -mokacan_sambamba_coverage_level_stage = ( - " -istage-FPzGjfQ0jy1y01vG60K22qG1.coverage_level=" -) -mokacan_vardict_sample_name_stage = ( - " -istage-FPzGjgj0jy1Q2JJF2zYx5J5k.sample_name=vardict_" -) -mokacan_varscan_bedfile_stage = " -istage-FPzGjp80jy1V3Jvb5z6xfpfZ.bed_file=" - -# mokacan stages with inputs that shouldn't change - these are specified to ensure any input files are taken from 001 -mokacan_senteion_bwa_reference_stage = " -istage-FgYgB2Q087fjzvxy9f4q1K8X.genomebwaindex_targz=project-ByfFPz00jy1fk6PjpZ95F27J:file-B6ZY4942J35xX095VZyQBk0v" -mokacan_senteion_reference_stage = " -istage-FgYgB2Q087fjzvxy9f4q1K8X.genome_fastagz=project-ByfFPz00jy1fk6PjpZ95F27J:file-B6ZY7VG2J35Vfvpkj8y0KZ01" -mokacan_picard_reference_stage = " -istage-FPzGjV80jy1x97jg607Fg22b.fasta_index=project-ByfFPz00jy1fk6PjpZ95F27J:file-ByYgX700b80gf4ZY1GxvF3Jv" -mokacan_vardict_reference_stage = " -istage-FPzGjgj0jy1Q2JJF2zYx5J5k.ref_genome=project-ByfFPz00jy1fk6PjpZ95F27J:file-ByYgX700b80gf4ZY1GxvF3Jv" -mokacan_varscan_reference_stage = " -istage-FPzGjp80jy1V3Jvb5z6xfpfZ.ref_genome=project-ByfFPz00jy1fk6PjpZ95F27J:file-ByYgX700b80gf4ZY1GxvF3Jv" - mokaamp_email_message = ( "If both MokaAMP and MokaOnc (amplivar) have been run," "please record the version of MokaOnc used." @@ -621,7 +586,6 @@ archer_panel_list = ["Pan4396", "Pan5113", "Pan5115"] swift_57G_panel_list = ["Pan4082"] swift_egfr_panel_list = ["Pan4081"] -mokacan_panel_list = ["Pan4579", "Pan4574"] LRPCR_panel_list = [ "Pan5007", "Pan5008", @@ -651,7 +615,6 @@ "joint_variant_calling": False, "mokaamp": False, "capture_type": "Hybridisation", # "Amplicon" or "Hybridisation" - "mokacan": False, "mokasnp": False, "mokapipe": False, "mokapipe_haplotype_caller_padding": 0, @@ -1216,14 +1179,12 @@ "congenica_upload": False, }, "Pan4574": { # somatic VCP2 M1.2 - "mokacan": True, "congenica_upload": False, "variant_calling_bedfile": "Pan4577data.bed", "hsmetrics_bedfile": "Pan5123data.bed", "clinical_coverage_depth": 200, }, "Pan4579": { # somatic VCP2 M1.1 - "mokacan": True, "congenica_upload": False, "variant_calling_bedfile": "Pan4578data.bed", "hsmetrics_bedfile": "Pan5123data.bed", diff --git a/upload_and_setoff_workflows.py b/upload_and_setoff_workflows.py index ad6701a0..ab17ccd1 100644 --- a/upload_and_setoff_workflows.py +++ b/upload_and_setoff_workflows.py @@ -204,10 +204,6 @@ def __init__(self, runfolder, now, debug_mode=False): "jobid=$(dx run %s%s --priority high -y --name " % (config.app_project, config.mokaamp_path) ) - self.mokacan_command = ( - "jobid=$(dx run %s%s --priority high -y --name " - % (config.app_project, config.mokacan_path) - ) self.decision_support_preperation = "analysisid=$(python %s -a " % ( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -1458,12 +1454,6 @@ def start_building_dx_run_cmds(self): ) commands_list.append(self.add_to_depends_list(fastq, 'depends_list')) - if self.panel_dictionary[panel]["mokacan"]: - commands_list.append( - self.create_mokacan_command(fastq, panel) - ) - commands_list.append(self.add_to_depends_list(fastq, 'depends_list')) - # if panel is to be processed using mokasnp if self.panel_dictionary[panel]["mokasnp"]: commands_list.append( @@ -2010,61 +2000,6 @@ def create_mokaamp_command(self, fastq, pannumber): ) return dx_command - def create_mokacan_command(self, fastq, pannumber): - """ - Input = R1 fastq file name and pan number for a single sample - Returns = dx run command for MokaCAN (string) - """ - # build nexus fastq paths - returns tuple for read1 and read2 and dictionary for bed files - fastqs = self.nexus_fastq_paths(fastq) - bedfiles = self.nexus_bedfiles(pannumber) - - # create the MokaCAN dx command - dx_command_list = [ - self.mokacan_command, - fastqs[2], - config.mokacan_fastqc_r1_stage, - fastqs[0], - config.mokacan_fastqc_r2_stage, - fastqs[1], - config.mokacan_sentieon_sample_name_stage, - fastqs[2], - config.mokacan_picard_bedfile_stage, - bedfiles["hsmetrics"], - config.mokacan_picard_capturetype_stage, - self.panel_dictionary[pannumber]["capture_type"], - config.mokacan_sambamba_coverage_level_stage, - self.panel_dictionary[pannumber]["clinical_coverage_depth"], - config.mokacan_sambamba_bedfile_stage, - bedfiles["sambamba"], - config.mokacan_vardict_bedfile_stage, - bedfiles["variant_calling_bedfile"], - config.mokacan_varscan_bedfile_stage, - bedfiles["variant_calling_bedfile"], - config.mokacan_vardict_sample_name_stage, - fastqs[2], - config.mokacan_senteion_bwa_reference_stage, - config.mokacan_senteion_reference_stage, - config.mokacan_picard_reference_stage, - config.mokacan_vardict_reference_stage, - config.mokacan_varscan_reference_stage, - self.dest, - self.dest_cmd, - self.token, - ] - - # Variables from dx_command_list are read from config file as various atomic types. Convert - # to string and join to create dx_command. - dx_command = "".join(map(str, dx_command_list)) - # remove the bit that adds the job to the depends on list for the negative control as varscan - # fails on near empty/-empty BAM files - # and this will stop multiqc etc running - if "NTCcon" in fastqs[0]: - dx_command = dx_command.replace("jobid=$(", "").replace( - config.Nexus_API_Key + ")", config.Nexus_API_Key - ) - return dx_command - def prepare_rpkm_list(self, rpkm_list): """ Input = a list of panels which requires RPKM analysis @@ -2469,14 +2404,6 @@ def write_opms_queries_custom_panel(self): self.runfolder_obj.runfolder_name, ) ) - elif self.panel_dictionary[pannumber]["mokacan"]: - queries.append( - query.format( - str(fastq.split("_")[2]), - config.mokacan_pipeline_ID, - self.runfolder_obj.runfolder_name, - ) - ) if queries: # add workflow to sql dictionary From dfe74a105be8dc20e3538838a707ec6277e04a00 Mon Sep 17 00:00:00 2001 From: MokaGuys Date: Fri, 4 Aug 2023 10:10:39 +0100 Subject: [PATCH 12/16] added congenica projects --- automate_demultiplex_config.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index bdceb4a9..d6c5565d 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -724,8 +724,8 @@ "RPKM_bedfile_pan_number": "Pan5109", "RPKM_also_analyse": vcp2_panel_list, "congenica_credentials": "STG", - "congenica_IR_template": "non-priority", #TODO check if priority is enabled - "congenica_project": "",#waiting on monogenics, + "congenica_IR_template": "non-priority", + "congenica_project": "14629", "hsmetrics_bedfile": "Pan5123data.bed", "variant_calling_bedfile": "Pan5119data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", @@ -736,8 +736,8 @@ "RPKM_bedfile_pan_number": "Pan5109", "RPKM_also_analyse": vcp2_panel_list, "congenica_credentials": "STG", - "congenica_IR_template": "non-priority", # TO DO check if priority is enabled - "congenica_project": "",#waiting on monogenics, + "congenica_IR_template": "non-priority", + "congenica_project": "14630", "hsmetrics_bedfile": "Pan5123data.bed", "variant_calling_bedfile": "Pan5119data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", @@ -1010,7 +1010,7 @@ "mokapipe": True, "multiqc_coverage_level": 30, "RPKM_bedfile_pan_number": "Pan5109", - "congenica_project": "12814", + "congenica_project": "14563", "RPKM_also_analyse": vcp2_panel_list, "hsmetrics_bedfile": "Pan5123data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", @@ -1020,7 +1020,7 @@ "mokapipe": True, "multiqc_coverage_level": 30, "RPKM_bedfile_pan_number": "Pan5109", - "congenica_project": "12814", + "congenica_project": "14564", "RPKM_also_analyse": vcp2_panel_list, "hsmetrics_bedfile": "Pan5123data.bed", "sambamba_bedfile": "Pan5123dataSambamba.bed", From 13e14b632c36ba71999283fe19f7400a4e808a6b Mon Sep 17 00:00:00 2001 From: RachelDuffin Date: Mon, 7 Aug 2023 12:21:10 +0100 Subject: [PATCH 13/16] Make MultiQC dependent on non-NTC TSO sample coverage jobs for inclusion in MultiQC report --- upload_and_setoff_workflows.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/upload_and_setoff_workflows.py b/upload_and_setoff_workflows.py index ab17ccd1..d171b676 100644 --- a/upload_and_setoff_workflows.py +++ b/upload_and_setoff_workflows.py @@ -247,7 +247,7 @@ def __init__(self, runfolder, now, debug_mode=False): self.depends_list_gatk = 'depends_list_gatk="${depends_list_gatk} -d ${jobid} "' self.depends_list_recombined = 'depends_list="${depends_list} ${depends_list_gatk} "' # Argument to define depends_list only if the job ID exists - self.if_jobid_exists_depends ='if ! [ -z "${jobid}" ]; then %s; fi' + self.if_jobid_exists_depends = 'if ! [ -z "${jobid}" ]; then %s; fi' # command to restart upload agent part 1 self.restart_ua_1 = "ua_status=1; while [ $ua_status -ne 0 ]; do " @@ -1507,10 +1507,16 @@ def start_building_dx_run_cmds(self): # is not empty commands_list.append(self.if_jobid_exists_depends % self.add_to_depends_list(sample, 'depends_list')) + commands_list.append(self.create_sambamba_cmd(sample, pannumber)) + # Exclude negative controls from the depends list as the NTC + # coverage calculation can often fail. We want the coverage + # report for the NTC sample to help assess contamination. + # Only add to depends_list if job ID from previous command + # is not empty + commands_list.append(self.if_jobid_exists_depends % self.add_to_depends_list(sample, 'depends_list')) + if "HD200" in sample: - commands_list.append( - self.create_sompy_cmd(sample, pannumber) - ) + commands_list.append(self.create_sompy_cmd(sample, pannumber)) # Only add to depends_list if job ID from previous command # is not empty commands_list.append(self.if_jobid_exists_depends % self.add_to_depends_list("sompy", 'depends_list')) @@ -1521,20 +1527,6 @@ def start_building_dx_run_cmds(self): commands_list.append(self.add_to_depends_list("UploadMultiQC", 'depends_list')) # setoff the below commands later as they are not depended upon by # MultiQC but are required for duty_csv - - if TSO500: - for sample in self.list_of_processed_samples: - pannumber = re.search(r"Pan\d+", sample).group() - commands_list.append( - self.create_sambamba_cmd(sample, pannumber) - ) - # Exclude negative controls from the depends list as the NTC - # coverage calculation can often fail. We want the coverage - # report for the NTC sample to help assess contamination. - # Only add to depends_list if job ID from previous command - # is not empty - commands_list.append(self.if_jobid_exists_depends % self.add_to_depends_list(sample, 'depends_list')) - if rpkm_list: # Create a set of RPKM numbers for one command per panel # pass this list into function which takes into account panels @@ -2172,7 +2164,8 @@ def add_to_depends_list(self, fastq, depends_type): However, some jobs should be excluded from the depends list, eg negative controls Returns = command which adds jobid to the bash string (string) """ - if "NTCcon" in fastq: + ntcon_strings = ["00000", "NTCcon", "NTC000", "NC000"] + if any(identifier in fastq for identifier in ntcon_strings): return None elif depends_type=='depends_list': return self.depends_list From c55857539eb0d8ce82b0b40bd76f59fe45c92e29 Mon Sep 17 00:00:00 2001 From: MokaGuys Date: Tue, 8 Aug 2023 10:28:51 +0100 Subject: [PATCH 14/16] Set Testing to False --- automate_demultiplex_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index d6c5565d..1f63f58d 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -8,7 +8,7 @@ import os # Set debug mode -testing = True +testing = False # =====location of input/output files===== # root of folder that contains the apps, automate_demultiplexing_logfiles and From 4513ac72b882b9246ca7bf3e56d4ac137a443b1e Mon Sep 17 00:00:00 2001 From: RachelDuffin Date: Wed, 9 Aug 2023 10:17:23 +0100 Subject: [PATCH 15/16] Remove obsolete output parser comments --- automate_demultiplex_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index 1f63f58d..e3b8b1d2 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -1192,7 +1192,7 @@ }, "Pan4969": { # TSO500 no UTRs. TERT promoter "TSO500": True, - "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, @@ -1201,7 +1201,7 @@ "Pan5085": { # TSO500 High throughput Synnovis. no UTRs. TERT promoter "TSO500": True, "TSO500_high_throughput": True, - "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, @@ -1210,7 +1210,7 @@ "Pan5112": { # TSO500 High throughput BSPS. no UTRs. TERT promoter "TSO500": True, "TSO500_high_throughput": True, - "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, @@ -1220,7 +1220,7 @@ "Pan5114": { # TSO500 High throughput Control. no UTRs. TERT promoter "TSO500": True, "TSO500_high_throughput": True, - "sambamba_bedfile": "Pan5130dataSambamba.bed", # NOTE All TSO500 output parser settings are currently taken from the first pan number listed in tso500_panel_list + "sambamba_bedfile": "Pan5130dataSambamba.bed", "clinical_coverage_depth": 100, "multiqc_coverage_level": 100, "coverage_min_basecall_qual": 25, From aa578fb88f23d4c914938b207bb1a22610c31a26 Mon Sep 17 00:00:00 2001 From: RachelDuffin Date: Wed, 9 Aug 2023 12:22:34 +0100 Subject: [PATCH 16/16] Remove MokaCAN pan numbers --- automate_demultiplex_config.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index e3b8b1d2..8a213bf0 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -424,8 +424,6 @@ "Pan5085", # TSO500 High throughput Synnovis. no UTRS TERT promoter "Pan5112", # TSO500 High throughput BSPS. no UTRS TERT promoter "Pan5114", # TSO500 High throughput Control. no UTRS TERT promoter - "Pan4579", # VCP2 M1.1 (somatic) - "Pan4574", # VCP2 M1.2 (somatic) "Pan4042", # STG VCP2 BRCA - TO BE REMOVED IN FUTURE UPDATE "Pan4043", # STG VCP3 - TO BE REMOVED IN FUTURE UPDATE "Pan4044", # STG VCP1 - TO BE REMOVED IN FUTURE UPDATE @@ -1178,18 +1176,6 @@ "archerdx": True, "congenica_upload": False, }, - "Pan4574": { # somatic VCP2 M1.2 - "congenica_upload": False, - "variant_calling_bedfile": "Pan4577data.bed", - "hsmetrics_bedfile": "Pan5123data.bed", - "clinical_coverage_depth": 200, - }, - "Pan4579": { # somatic VCP2 M1.1 - "congenica_upload": False, - "variant_calling_bedfile": "Pan4578data.bed", - "hsmetrics_bedfile": "Pan5123data.bed", - "clinical_coverage_depth": 200, - }, "Pan4969": { # TSO500 no UTRs. TERT promoter "TSO500": True, "sambamba_bedfile": "Pan5130dataSambamba.bed",