diff --git a/automate_demultiplex_config.py b/automate_demultiplex_config.py index 08f1c139..9bb00798 100644 --- a/automate_demultiplex_config.py +++ b/automate_demultiplex_config.py @@ -42,7 +42,7 @@ # path to log file which records the output of the upload agent upload_and_setoff_workflow_logfile = ( - "{document_root}/automate_demultiplexing_logfiles/upload_agent_script_logfiles/" + "{document_root}/automate_demultiplexing_logfiles/upload_agent_script_logfiles/" ).format(document_root=document_root) # name of log file which records the output of the upload agent @@ -53,23 +53,23 @@ # Path to DNA Nexus run command log file DNA_Nexus_workflow_logfolder = ( - "{document_root}/automate_demultiplexing_logfiles/dx_run_commands/" + "{document_root}/automate_demultiplexing_logfiles/dx_run_commands/" ).format(document_root=document_root) # log folder containing project creation logs DNA_Nexus_project_creation_logfolder = ( - "{document_root}/automate_demultiplexing_logfiles/nexus_project_creation_scripts" - "/create_nexus_project_" + "{document_root}/automate_demultiplexing_logfiles/nexus_project_creation_scripts" + "/create_nexus_project_" ).format(document_root=document_root) # folder containing demultiplex logs demultiplex_logfiles = ( - "{document_root}/automate_demultiplexing_logfiles/Demultiplexing_log_files/" + "{document_root}/automate_demultiplexing_logfiles/Demultiplexing_log_files/" ).format(document_root=document_root) # path to upload agent upload_agent_path = ("{document_root}/apps/dnanexus-upload-agent-1.5.17-linux/ua").format( - document_root=document_root + document_root=document_root ) upload_agent_test_command = " --version" @@ -77,13 +77,13 @@ # path to backup_runfolder script backup_runfolder_script = ( - "/usr/local/src/mokaguys/apps/workstation_housekeeping/backup_runfolder.py" + "/usr/local/src/mokaguys/apps/workstation_housekeeping/backup_runfolder.py" ) # backup runfolder folder backup_runfolder_logfile = ( - "{document_root}/automate_demultiplexing_logfiles" - "/backup_runfolder_logfiles" + "{document_root}/automate_demultiplexing_logfiles" + "/backup_runfolder_logfiles" ).format(document_root=document_root) backup_runfolder_success = "backup_runfolder INFO - END" @@ -94,6 +94,11 @@ # expected result from testing dx_sdk_test_expected_stdout = "dx v0.2" +# command and output to test agilent connector +agilent_connector_cmd = "/opt/agilent/agilentserviceconnector status" +agilent_connector_output = "agilentserviceconnector is running" + +# upload agent test response upload_agent_expected_stdout = "Upload Agent Version:" # =====Moka settings===== @@ -122,7 +127,8 @@ project_success = 'Created new project called "%s"' # The project containing the app and data -app_project = "001_ToolsReferenceData:/" +#app_project = "001_ToolsReferenceData:/" +app_project = "project-ByfFPz00jy1fk6PjpZ95F27J:/" # path to the workflow in the app project mokapipe_path = "Workflows/GATK3.5_v2.12" @@ -166,7 +172,7 @@ mokapipe_bwa_rg_sample = " -istage-Byz9BJ80jy1k2VB9xVXBp0Fg.read_group_sample=" # bwa rg samplename mokapipe_sambamba_input = " -istage-F35zBKQ0jy1XpfzYPZY4bgX6.sambamba_bed=" # Sambamba Bed file mokapipe_mokapicard_vendorbed_input = ( - " -istage-F9GK4QQ0jy1qj14PPZxxq3VG.vendor_exome_bedfile=" # HSMetrics Bed file + " -istage-F9GK4QQ0jy1qj14PPZxxq3VG.vendor_exome_bedfile=" # HSMetrics Bed file ) mokapipe_haplotype_padding_input = " -i" +mokapipe_gatk_human_exome_stage + ".padding=" mokapipe_haplotype_bedfile_input = " -i" +mokapipe_gatk_human_exome_stage + ".bedfile=" @@ -207,8 +213,8 @@ mokaamp_mpileup_cov_level_stage = " -istage-FxypXb807p1zj3g8Jv45Y54P.min_coverage=" mokaamp_email_message = ( - "If both MokaAMP and MokaOnc (amplivar) have been run," - "please record the version of MokaOnc used." + "If both MokaAMP and MokaOnc (amplivar) have been run," + "please record the version of MokaOnc used." ) # Peddy @@ -232,7 +238,7 @@ # emails addresses for Ingenuity oncology_IVA_email = "gst-tr.oncology.interpret@nhs.net" # general oncology email interpretation_request_email = ( - "gst-tr.interpretation.request@nhs.net" # email for Interpretation_requests + "gst-tr.interpretation.request@nhs.net" # email for Interpretation_requests ) wes_email_address = "gst-tr.wesviapath@nhs.net" # WES email @@ -243,7 +249,7 @@ # DNA Nexus authentication token nexus_api_key_file = "{document_root}/.dnanexus_auth_token".format(document_root=document_root) with open(nexus_api_key_file, "r") as nexus_api: - Nexus_API_Key = nexus_api.readline().rstrip() + Nexus_API_Key = nexus_api.readline().rstrip() # list of DNA Nexus users with view access to project view_users = ["org-viapath_prod", "InterpretationRequest"] @@ -262,34 +268,35 @@ # =====List of all panel numbers===== panel_list = [ - "Pan1190", # swift EGFR - "Pan2684", # swift 57 - "Pan2835", # twist WES - "Pan4042", # STG VCP2 BRCA - "Pan4043", # STG VCP3 - "Pan4044", # STG VCP1 - "Pan4049", # STG VCP2 CrCa - "Pan3174", # WES trio - #"Pan4119", # VCP1 Viapath FH - #"Pan4121", # VCP1 Viapath CF - #"Pan4122", # VCP1 Viapath FGFR - #"Pan4125", # VCP1 Viapath DMD - #"Pan4126", # VCP1 Viapath CADASIL - "Pan4145", # VCP3 Viapath CMD - "Pan4146", # VCP3 Viapath CM - "Pan4149", # VCP2 Viapath BRCA - #"Pan4150", # VCP2 Viapath ovarian - #"Pan4127", # VCP2 Viapath colorectal - #"Pan4129", # VCP2 Viapath lynch - #"Pan4130", # VCP2 Viapath polyposis - #"Pan4132", # VCP3 Viapath R56 - #"Pan4134", # VCP3 Viapath R57 - #"Pan4136", # VCP3 Viapath R58 - #"Pan4137", # VCP3 Viapath R60 - #"Pan4138", # VCP3 Viapath R62 - #"Pan4143", # VCP3 Viapath R66 - #"Pan4144", # VCP3 Viapath R78 - #"Pan4151" # VCP3 Viapath R82 + "Pan1190", # swift EGFR + "Pan2684", # swift 57 + "Pan2835", # twist WES + "Pan4042", # STG VCP2 BRCA + "Pan4043", # STG VCP3 + "Pan4044", # STG VCP1 + "Pan4049", # STG VCP2 CrCa + "Pan3174", # WES trio + "Pan4119", # VCP1 Viapath FH + "Pan4121", # VCP1 Viapath CF + "Pan4122", # VCP1 Viapath FGFR + "Pan4125", # VCP1 Viapath DMD + "Pan4126", # VCP1 Viapath CADASIL + "Pan4145", # VCP3 Viapath CMD + "Pan4146", # VCP3 Viapath CM + "Pan4149", # VCP2 Viapath BRCA + "Pan4150", # VCP2 Viapath ovarian + "Pan4127", # VCP2 Viapath colorectal + "Pan4129", # VCP2 Viapath lynch + "Pan4130", # VCP2 Viapath polyposis + "Pan4132", # VCP3 Viapath R56 + "Pan4134", # VCP3 Viapath R57 + "Pan4136", # VCP3 Viapath R58 + "Pan4137", # VCP3 Viapath R60 + "Pan4138", # VCP3 Viapath R62 + "Pan4143", # VCP3 Viapath R66 + "Pan4144", # VCP3 Viapath R78 + "Pan4151", # VCP3 Viapath R82 + "Pan2764" # OnePGT ] @@ -299,321 +306,325 @@ vcp3_panel_list = ["Pan4132","Pan4134","Pan4136","Pan4137","Pan4138","Pan4143","Pan4144","Pan4145","Pan4146","Pan4151","Pan4043"] default_panel_properties = { - "UMI": False, - "UMI_bcl2fastq": None, # eg Y145,I8,Y9I8,Y145 - "RPKM_bedfile_pan_number": None, - "RPKM_also_analyse": None, # List of Pan Numbers indicating which BAM files to download - "onePGT": False, - "mokawes": False, - "joint_variant_calling": False, - "mokaamp": False, - "capture_type": "Hybridisation", # "Amplicon" or "Hybridisation" - "mokaonc": False, - "mokapipe": False, - "mokapipe_haplotype_caller_padding": 0, - "mokaamp_varscan_strandfilter": True, - "iva_upload": False, - "congenica_upload": True, - "STG": False, - "oncology": False, - "congenica_credentials": "Viapath", # "Viapath" OR "STG" - "congenica_IR_template": "priority", # 'priority' or 'non-priority' - "clinical_coverage_depth": None, # only found in mokamp command - "multiqc_coverage_level": 30, - # Note: hsmetrics_bedfile only used when BED file name differs from Pan number - "hsmetrics_bedfile": None, - # Note: variant_calling_bedfile only used when BED file differs from Pan number - "variant_calling_bedfile": None, - # Note: sambamba_bedfile only used when BED file differs from Pan number - "sambamba_bedfile": None, - "ingenuity_email": interpretation_request_email, - "congenica_project": None, - "peddy": False, + "UMI": False, + "UMI_bcl2fastq": None, # eg Y145,I8,Y9I8,Y145 + "RPKM_bedfile_pan_number": None, + "RPKM_also_analyse": None, # List of Pan Numbers indicating which BAM files to download + "onePGT": False, + "mokawes": False, + "joint_variant_calling": False, + "mokaamp": False, + "capture_type": "Hybridisation", # "Amplicon" or "Hybridisation" + "mokaonc": False, + "mokapipe": False, + "mokapipe_haplotype_caller_padding": 0, + "mokaamp_varscan_strandfilter": True, + "iva_upload": False, + "congenica_upload": True, + "STG": False, + "oncology": False, + "congenica_credentials": "Viapath", # "Viapath" OR "STG" + "congenica_IR_template": "priority", # 'priority' or 'non-priority' + "clinical_coverage_depth": None, # only found in mokamp command + "multiqc_coverage_level": 30, + # Note: hsmetrics_bedfile only used when BED file name differs from Pan number + "hsmetrics_bedfile": None, + # Note: variant_calling_bedfile only used when BED file differs from Pan number + "variant_calling_bedfile": None, + # Note: sambamba_bedfile only used when BED file differs from Pan number + "sambamba_bedfile": None, + "ingenuity_email": interpretation_request_email, + "congenica_project": None, + "peddy": False, } # override default panel settings panel_settings = { - "Pan2835": { # TWIST WES at GSTT - "mokawes": True, - "multiqc_coverage_level": 20, - "hsmetrics_bedfile": "Twist_Exome_RefSeq_CCDS_v1.2_targets.bed", - "sambamba_bedfile": "Pan493dataSambamba.bed", - "peddy": True, - }, - "Pan3174": { # TWIST WES TRIO at GSTT - "mokawes": True, - "multiqc_coverage_level": 20, - "hsmetrics_bedfile": "Twist_Exome_RefSeq_CCDS_v1.2_targets.bed", - "sambamba_bedfile": "Pan493dataSambamba.bed", - "peddy": True, - }, - "Pan1190": { # EGFR SWIFT Panel - "oncology": True, - "mokaonc": True, - "capture_type": "Amplicon", - "clinical_coverage_depth": 1000, - "multiqc_coverage_level": 100 - }, - "Pan2684": { # 57G SWIFT panel - "RPKM_bedfile_pan_number": None, - "mokaamp": True, - "oncology": True, - "capture_type": "Amplicon", - "clinical_coverage_depth": 600, # only found in mokamp command - "multiqc_coverage_level": 100, - }, - "Pan4044": { # VCP1 STG - "mokapipe": True, - "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3624", - "RPKM_also_analyse": vcp1_panel_list, - "congenica_credentials": "STG", - "congenica_IR_template":"non-priority", - "congenica_project": "4203", - "hsmetrics_bedfile": "Pan4003data.bed", - "variant_calling_bedfile": "Pan4003data.bed", - "sambamba_bedfile": "Pan4003dataSambamba.bed", - "STG": True, - }, - "Pan4042": { # VCP2 STG BRCA - "mokapipe": True, - "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3614", - "RPKM_also_analyse": vcp2_panel_list, - "congenica_credentials": "STG", - "congenica_IR_template":"non-priority", - "congenica_project": "1099", - "mokapipe_haplotype_caller_padding":1, - "hsmetrics_bedfile": "Pan4011data.bed", - "variant_calling_bedfile": "Pan4011data.bed", - "sambamba_bedfile": "Pan4011dataSambamba.bed", - }, - "Pan4049": { # VCP2 STG CrCa - "mokapipe": True, - "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3614", - "RPKM_also_analyse": vcp2_panel_list, - "congenica_credentials": "STG", - "congenica_IR_template":"non-priority", - "congenica_project": "4202", - "mokapipe_haplotype_caller_padding":1, - "hsmetrics_bedfile": "Pan4011data.bed", - "variant_calling_bedfile": "Pan4011data.bed", - "sambamba_bedfile": "Pan4011dataSambamba.bed", - }, - "Pan4043": { # VCP3 STG - "mokapipe": True, - "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", - "RPKM_also_analyse": vcp3_panel_list, - "congenica_credentials": "STG", - "congenica_IR_template":"non-priority", - "congenica_project": "4201", - "mokapipe_haplotype_caller_padding":1, - "hsmetrics_bedfile": "Pan4114data.bed", - "variant_calling_bedfile": "Pan4114data.bed", - "sambamba_bedfile": "Pan4114dataSambamba.bed", - }, - # "Pan4119": { #VCP1 R134_Familial hypercholesterolaemia-Familial hypercholesterolaemia Small panel (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3624", - # "congenica_project": "4666", - # "RPKM_also_analyse": vcp1_panel_list, - # "hsmetrics_bedfile": "Pan4003data.bed", - # "sambamba_bedfile": "Pan4003dataSambamba.bed", - # "variant_calling_bedfile": "Pan4118data.bed", - # }, - # "Pan4121": { #VCP1 R184 CF (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3624", - # "congenica_project": "4862", - # "RPKM_also_analyse": vcp1_panel_list, - # "hsmetrics_bedfile": "Pan4003data.bed", - # "sambamba_bedfile": "Pan4003dataSambamba.bed", - # "variant_calling_bedfile": "Pan4118data.bed", - # }, - # "Pan4122": { #VCP1 R25 FGFR Viapath - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3624", - # "congenica_project": "4863", - # "RPKM_also_analyse": vcp1_panel_list, - # "hsmetrics_bedfile": "Pan4003data.bed", - # "sambamba_bedfile": "Pan4003dataSambamba.bed", - # "variant_calling_bedfile": "Pan4118data.bed", - # }, - # "Pan4125": { #VCP1 R73 DMD (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3624", - # "congenica_project": "4861", - # "RPKM_also_analyse": vcp1_panel_list, - # "hsmetrics_bedfile": "Pan4003data.bed", - # "sambamba_bedfile": "Pan4003dataSambamba.bed", - # "variant_calling_bedfile": "Pan4118data.bed", - # }, - # "Pan4126": { #VCP1 R337_CADASIL Viapath - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3624", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp1_panel_list, - # "hsmetrics_bedfile": "Pan4003data.bed", - # "sambamba_bedfile": "Pan4003dataSambamba.bed", - # "variant_calling_bedfile": "Pan4118data.bed", - # }, - "Pan4149": { #VCP2 BRCA (Viapath) - "mokapipe": True, - "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3614", - "congenica_project": "4665", - "RPKM_also_analyse": vcp2_panel_list, - "hsmetrics_bedfile": "Pan4011data.bed", - "sambamba_bedfile": "Pan4148dataSambamba.bed", - "variant_calling_bedfile": "Pan4090data.bed", - }, - # "Pan4150": { #VCP2 R207 ovarian cancer (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3614", - # "congenica_project": "4864", - # "RPKM_also_analyse": vcp2_panel_list, - # "hsmetrics_bedfile": "Pan4011data.bed", - # "sambamba_bedfile": "PanXXXdataSambamba.bed", - # "variant_calling_bedfile": "PanXXXdata.bed", - # }, - # "Pan4127": { #VCP2 R209 colorectal cancer (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3614", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp2_panel_list, - # "hsmetrics_bedfile": "Pan4011data.bed", - # "sambamba_bedfile": "PanXXXdataSambamba.bed", - # "variant_calling_bedfile": "PanXXXdata.bed", - # }, - # "Pan4129": { #VCP2 R210 Lynch syndrome (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3614", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp2_panel_list, - # "hsmetrics_bedfile": "Pan4011data.bed", - # "sambamba_bedfile": "PanXXXdataSambamba.bed", - # "variant_calling_bedfile": "PanXXXdata.bed", - # }, - # "Pan4130": { #VCP2 R211 polyposis (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3614", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp2_panel_list, - # "hsmetrics_bedfile": "Pan4011data.bed", - # "sambamba_bedfile": "PanXXXdataSambamba.bed", - # "variant_calling_bedfile": "PanXXXdata.bed", - # }, - # "Pan4132": { #VCP3 R56 (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, - # "Pan4134": { #VCP3 R57 (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, - # "Pan4136": { #VCP3 R58 (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, - # "Pan4137": { #VCP3 R60 (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, - # "Pan4138": { #VCP3 R62 (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, - # "Pan4143": { #VCP3 R66 (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, - # "Pan4144": { #VCP3 R78 (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, - "Pan4145": { #VCP3 R79 - CMD (Viapath) - "mokapipe": True, - "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", - "congenica_project": "4666", - "RPKM_also_analyse": vcp3_panel_list, - "hsmetrics_bedfile": "Pan4114data.bed", - "sambamba_bedfile": "Pan4114dataSambamba.bed", - "variant_calling_bedfile": "Pan4114data.bed", - }, - "Pan4146": { #VCP3 R81 CM (Viapath) - "mokapipe": True, - "multiqc_coverage_level": 30, - "RPKM_bedfile_pan_number": "Pan3974", - "congenica_project": "4666", - "RPKM_also_analyse": vcp3_panel_list, - "hsmetrics_bedfile": "Pan4114data.bed", - "sambamba_bedfile": "Pan4114dataSambamba.bed", - "variant_calling_bedfile": "Pan4114data.bed", - }, - # "Pan4151": { #VCP3 R82 limb girdle (Viapath) - # "mokapipe": True, - # "multiqc_coverage_level": 30, - # "RPKM_bedfile_pan_number": "Pan3974", - # "congenica_project": "TBC", - # "RPKM_also_analyse": vcp3_panel_list, - # "hsmetrics_bedfile": "Pan4114data.bed", - # "sambamba_bedfile": "Pan4114dataSambamba.bed", - # "variant_calling_bedfile": "Pan4114data.bed", - # }, + "Pan2835": { # TWIST WES at GSTT + "mokawes": True, + "multiqc_coverage_level": 20, + "hsmetrics_bedfile": "Twist_Exome_RefSeq_CCDS_v1.2_targets.bed", + "sambamba_bedfile": "Pan493dataSambamba.bed", + "peddy": True, + }, + "Pan3174": { # TWIST WES TRIO at GSTT + "mokawes": True, + "multiqc_coverage_level": 20, + "hsmetrics_bedfile": "Twist_Exome_RefSeq_CCDS_v1.2_targets.bed", + "sambamba_bedfile": "Pan493dataSambamba.bed", + "peddy": True, + }, + "Pan1190": { # EGFR SWIFT Panel + "oncology": True, + "mokaonc": True, + "capture_type": "Amplicon", + "clinical_coverage_depth": 1000, + "multiqc_coverage_level": 100 + }, + "Pan2684": { # 57G SWIFT panel + "RPKM_bedfile_pan_number": None, + "mokaamp": True, + "oncology": True, + "capture_type": "Amplicon", + "clinical_coverage_depth": 600, # only found in mokamp command + "multiqc_coverage_level": 100, + }, + "Pan4044": { # VCP1 STG + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3624", + "RPKM_also_analyse": vcp1_panel_list, + "congenica_credentials": "STG", + "congenica_IR_template":"non-priority", + "congenica_project": "4203", + "hsmetrics_bedfile": "Pan4003data.bed", + "variant_calling_bedfile": "Pan4003data.bed", + "sambamba_bedfile": "Pan4003dataSambamba.bed", + "STG": True, + }, + "Pan4042": { # VCP2 STG BRCA + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3614", + "RPKM_also_analyse": vcp2_panel_list, + "congenica_credentials": "STG", + "congenica_IR_template":"non-priority", + "congenica_project": "1099", + "mokapipe_haplotype_caller_padding":1, + "hsmetrics_bedfile": "Pan4011data.bed", + "variant_calling_bedfile": "Pan4011data.bed", + "sambamba_bedfile": "Pan4011dataSambamba.bed", + }, + "Pan4049": { # VCP2 STG CrCa + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3614", + "RPKM_also_analyse": vcp2_panel_list, + "congenica_credentials": "STG", + "congenica_IR_template":"non-priority", + "congenica_project": "4202", + "mokapipe_haplotype_caller_padding":1, + "hsmetrics_bedfile": "Pan4011data.bed", + "variant_calling_bedfile": "Pan4011data.bed", + "sambamba_bedfile": "Pan4011dataSambamba.bed", + }, + "Pan4043": { # VCP3 STG + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "RPKM_also_analyse": vcp3_panel_list, + "congenica_credentials": "STG", + "congenica_IR_template":"non-priority", + "congenica_project": "4201", + "mokapipe_haplotype_caller_padding":1, + "hsmetrics_bedfile": "Pan4278data.bed", + "variant_calling_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + }, + "Pan4119": { #VCP1 R134_Familial hypercholesterolaemia-Familial hypercholesterolaemia Small panel (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3624", + "congenica_project": "4664", + "RPKM_also_analyse": vcp1_panel_list, + "hsmetrics_bedfile": "Pan4287data.bed", + "sambamba_bedfile": "Pan4287dataSambamba.bed", + "variant_calling_bedfile": "Pan4302data.bed", + }, + "Pan4121": { #VCP1 R184 CF (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3624", + "congenica_project": "4862", + "RPKM_also_analyse": vcp1_panel_list, + "hsmetrics_bedfile": "Pan4287ata.bed", + "sambamba_bedfile": "Pan4287dataSambamba.bed", + "variant_calling_bedfile": "Pan4302data.bed", + }, + "Pan4122": { #VCP1 R25 FGFR Viapath + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3624", + "congenica_project": "4863", + "RPKM_also_analyse": vcp1_panel_list, + "hsmetrics_bedfile": "Pan4287data.bed", + "sambamba_bedfile": "Pan4287dataSambamba.bed", + "variant_calling_bedfile": "Pan4302data.bed", + }, + "Pan4125": { #VCP1 R73 DMD (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3624", + "congenica_project": "4861", + "RPKM_also_analyse": vcp1_panel_list, + "hsmetrics_bedfile": "Pan4287data.bed", + "sambamba_bedfile": "Pan4287dataSambamba.bed", + "variant_calling_bedfile": "Pan4302data.bed", + }, + "Pan4126": { #VCP1 R337_CADASIL Viapath + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3624", + "congenica_project": "4865", + "RPKM_also_analyse": vcp1_panel_list, + "hsmetrics_bedfile": "Pan4287data.bed", + "sambamba_bedfile": "Pan4287dataSambamba.bed", + "variant_calling_bedfile": "Pan4302data.bed", + }, + "Pan4149": { #VCP2 BRCA (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3614", + "congenica_project": "4665", + "RPKM_also_analyse": vcp2_panel_list, + "hsmetrics_bedfile": "Pan4310data.bed", + "sambamba_bedfile": "Pan4310dataSambamba.bed", + "variant_calling_bedfile": "Pan4301data.bed", + }, + "Pan4150": { #VCP2 R207 ovarian cancer (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3614", + "congenica_project": "4864", + "RPKM_also_analyse": vcp2_panel_list, + "hsmetrics_bedfile": "Pan4310data.bed", + "sambamba_bedfile": "Pan4310dataSambamba.bed", + "variant_calling_bedfile": "Pan4301data.bed", + }, + "Pan4127": { #VCP2 R209 colorectal cancer (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3614", + "congenica_project": "5093", + "RPKM_also_analyse": vcp2_panel_list, + "hsmetrics_bedfile": "Pan4310data.bed", + "sambamba_bedfile": "Pan4310dataSambamba.bed", + "variant_calling_bedfile": "Pan4301data.bed", + }, + "Pan4129": { #VCP2 R210 Lynch syndrome (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3614", + "congenica_project": "5094", + "RPKM_also_analyse": vcp2_panel_list, + "hsmetrics_bedfile": "Pan4310data.bed", + "sambamba_bedfile": "Pan4310dataSambamba.bed", + "variant_calling_bedfile": "Pan4301data.bed", + }, + "Pan4130": { #VCP2 R211 polyposis (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3614", + "congenica_project": "5095", + "RPKM_also_analyse": vcp2_panel_list, + "hsmetrics_bedfile": "Pan4310data.bed", + "sambamba_bedfile": "Pan4310dataSambamba.bed", + "variant_calling_bedfile": "Pan4301data.bed", + }, + "Pan4132": { #VCP3 R56 (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4134": { #VCP3 R57 (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4136": { #VCP3 R58 (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4137": { #VCP3 R60 (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4138": { #VCP3 R62 (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4143": { #VCP3 R66 (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4144": { #VCP3 R78 (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4145": { #VCP3 R79 - CMD (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "4666", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4146": { #VCP3 R81 CM (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "4666", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan4151": { #VCP3 R82 limb girdle (Viapath) + "mokapipe": True, + "multiqc_coverage_level": 30, + "RPKM_bedfile_pan_number": "Pan3974", + "congenica_project": "5092", + "RPKM_also_analyse": vcp3_panel_list, + "hsmetrics_bedfile": "Pan4278data.bed", + "sambamba_bedfile": "Pan4278dataSambamba.bed", + "variant_calling_bedfile": "Pan4278data.bed", + }, + "Pan2764": { # OnePGT + "onePGT": True, + "congenica_upload": False + } } # =====smartsheet API===== @@ -622,10 +633,10 @@ # API key smartsheet_api_key_file = "{document_root}/.smartsheet_auth_token".format( - document_root=document_root + document_root=document_root ) with open(smartsheet_api_key_file, "r") as ss_api: - smartsheet_api_key = ss_api.readline().rstrip() + smartsheet_api_key = ss_api.readline().rstrip() # columnIds ss_title = 6197963270711172 @@ -641,8 +652,8 @@ # ================ Requests info smartsheet_request_headers = { - "Authorization": "Bearer " + smartsheet_api_key, - "Content-Type": "application/json", + "Authorization": "Bearer " + smartsheet_api_key, + "Content-Type": "application/json", } smartsheet_request_url = "https://api.smartsheet.com/2.0/sheets/" + str(smartsheet_sheetid) @@ -654,15 +665,16 @@ mokaguys_email = "gst-tr.mokaguys@nhs.net" username_file_path = "{document_root}/.amazon_email_username".format(document_root=document_root) with open(username_file_path, "r") as username_file: - user = username_file.readline().rstrip() + user = username_file.readline().rstrip() pw_file = "{document_root}/.amazon_email_pw".format(document_root=document_root) with open(pw_file, "r") as email_password_file: - pw = email_password_file.readline().rstrip() + pw = email_password_file.readline().rstrip() host = "email-smtp.eu-west-1.amazonaws.com" port = 587 me = "moka.alerts@gstt.nhs.uk" you = mokaguys_email oncology_you = oncology_ops_email +WES_sample_name_email_list = ["DNAdutyscientist@viapath.co.uk", "Suzanne.lillis@viapath.co.uk", mokaguys_email] smtp_do_tls = True # ================ Integrity check @@ -673,13 +685,20 @@ # statement to write when checksums match checksum_match = "Checksums match" -# ================ demultiplexing -demultiplex_success_match = r".*Processing completed with 0 errors and 0 warnings.$" -# list of sequencers which require md5 checksums from integrity check to be assessed -sequencers_with_integrity_check = ["NB551068", "NB552085"] - # ================ cluster density calculation cluster_density_success_statement = "picard.illumina.CollectIlluminaLaneMetrics done" cluster_density_error_statement = "PicardException" cluster_density_file_suffix = ".illumina_lane_metrics" phasing_metrics_file_suffix = ".illumina_phasing_metrics" +novaseq_id = "A01229" + +# ================ demultiplexing +demultiplex_success_match = r".*Processing completed with 0 errors and 0 warnings.$" +# list of sequencers which require md5 checksums from integrity check to be assessed +sequencers_with_integrity_check = ["NB551068", "NB552085", novaseq_id] + +# ================ onePGT +agilent_upload_folder = "/media/data1/share/agilent_OnePGT_uploads/" +max_filesize_in_bytes = 5368709120 # 5GB (max size is 10GB per pair of fastq) +max_filesize_in_GB = "5GB" +rsync_logfile = "rsync_output.txt" diff --git a/upload_and_setoff_workflows.py b/upload_and_setoff_workflows.py index af6733c9..2ea23d6a 100644 --- a/upload_and_setoff_workflows.py +++ b/upload_and_setoff_workflows.py @@ -215,7 +215,6 @@ def __init__(self, runfolder, now, debug_mode=False): # pass the dictionary created above into ADloggers class - ** unpacks this dictionary # to populate inputs. This is used as an object where various logs can be written self.loggers = adlogger.ADLoggers(**self.log_config) - def run_tests(self): """ @@ -244,6 +243,18 @@ def run_tests(self): ): raise Exception, "dx toolkit not installed" + # test agilent connextor is running + if not self.test_upload_agent( + self.perform_test( + self.execute_subprocess_command( + config.agilent_connector_cmd + )[0], + "agilent_connector", + ) + ): + # don't raise exception - just raise a warning. + self.loggers.script.error("UA_fail 'Agilent connector not running - please restart'") + def quarterback(self): """ Input = None @@ -382,7 +393,11 @@ def perform_test(self, test_input, test): if test == "cluster_density": if config.cluster_density_success_statement not in test_input or config.cluster_density_error_statement in test_input: return False + if test == "agilent_connector": + if config.agilent_connector_output not in test_input: + return False return True + def test_dx_toolkit(self, test_result): """ @@ -457,7 +472,7 @@ def has_demultiplexed(self): self.loggers.script.info("Demultiplex has not been performed.") return False - def calculate_cluster_density(self,runfolder_path, runfolder_name): + def calculate_cluster_density(self, runfolder_path, runfolder_name): """ Inputs = runfolder name and runfolder path Uses a dockerised version of GATK to run picard CollectIlluminaLaneMetrics @@ -465,13 +480,20 @@ def calculate_cluster_density(self,runfolder_path, runfolder_name): If success statement seen in stderr record in log file else raise slack alert but do not stop run. Returns = None """ + # if novaseq need to give an extra flag to CollectIlluminaLaneMetrics + if config.novaseq_id in runfolder_name: + novaseq_flag = " --IS_NOVASEQ" + else: + novaseq_flag = "" + # docker command for tool cmd = "sudo docker run -v {}:/input_run \ broadinstitute/gatk:4.1.8.1 \ ./gatk CollectIlluminaLaneMetrics \ --RUN_DIRECTORY /input_run \ --OUTPUT_DIRECTORY /input_run \ - --OUTPUT_PREFIX {}".format(runfolder_path, runfolder_name) + --OUTPUT_PREFIX {} {}".format(runfolder_path, runfolder_name, novaseq_flag) + # capture stdout and stderr # NB all output from picard tool is in stderr (out, err) = self.execute_subprocess_command(cmd) @@ -1014,6 +1036,7 @@ def start_building_dx_run_cmds(self, list_of_processed_samples): congenica_upload = False joint_variant_calling = False # not currently in use rpkm_list = [] # list for panels needing RPKM analysis + onePGT_run = False # flag to skip processes not required for PGT runs # loop through samples for fastq in list_of_processed_samples: @@ -1067,7 +1090,12 @@ def start_building_dx_run_cmds(self, list_of_processed_samples): if self.panel_dictionary[panel]["mokaamp"]: commands_list.append(self.create_mokaamp_command(fastq, panel)) commands_list.append(self.add_to_depends_list(fastq)) - + + # if onePGT + if self.panel_dictionary[panel]["onePGT"]: + onePGT_run = True + self.move_onePGT_fastqs(fastq) + # if there is a congenica upload create the file which will be run manually, once QC is passed. if congenica_upload: self.build_congenica_command_file() @@ -1078,29 +1106,30 @@ def start_building_dx_run_cmds(self, list_of_processed_samples): ) # build run wide commands - if mokaonc_list: - commands_list.append(self.create_mokaonc_command(mokaonc_list)) - if joint_variant_calling: - commands_list.append(self.create_joint_variant_calling_command()) - if rpkm_list: - # Create a set of RPKM numbers for one command per panel - # pass this list into function which takes into account panels which are to be analysed - # together and returns a "cleaned_list" - for rpkm in self.prepare_rpkm_list(set(rpkm_list)): - commands_list.append(self.create_rpkm_command(rpkm)) - if peddy: - # TODO if custom panels and WES done together currently no way - # to stop custom panels being analysed by peddy - may cause problems - commands_list.append(self.run_peddy_command()) - # add to depends list so multiqc doesn't start until peddy finishes - # add_to_depends_list requires a string to determine if it's a negative control and shouldn't be added to depends on string. - # pass "peddy" to ensure it isn't skipped - commands_list.append(self.add_to_depends_list("peddy")) - # multiqc commands - commands_list.append(self.create_multiqc_command()) - commands_list.append(self.create_upload_multiqc_command()) - # smartsheet - commands_list.append(self.create_smartsheet_command()) + if not onePGT_run: + if mokaonc_list: + commands_list.append(self.create_mokaonc_command(mokaonc_list)) + if joint_variant_calling: + commands_list.append(self.create_joint_variant_calling_command()) + if rpkm_list: + # Create a set of RPKM numbers for one command per panel + # pass this list into function which takes into account panels which are to be analysed + # together and returns a "cleaned_list" + for rpkm in self.prepare_rpkm_list(set(rpkm_list)): + commands_list.append(self.create_rpkm_command(rpkm)) + if peddy: + # TODO if custom panels and WES done together currently no way + # to stop custom panels being analysed by peddy - may cause problems + commands_list.append(self.run_peddy_command()) + # add to depends list so multiqc doesn't start until peddy finishes + # add_to_depends_list requires a string to determine if it's a negative control and shouldn't be added to depends on string. + # pass "peddy" to ensure it isn't skipped + commands_list.append(self.add_to_depends_list("peddy")) + # multiqc commands + commands_list.append(self.create_multiqc_command()) + commands_list.append(self.create_upload_multiqc_command()) + # smartsheet + commands_list.append(self.create_smartsheet_command()) return commands_list def create_mokawes_command(self, fastq, pannumber): @@ -1405,6 +1434,57 @@ def create_joint_variant_calling_command(self): # TODO: Implement joint-variant calling command for peddy raise NotImplementedError + def move_onePGT_fastqs(self, fastq): + """ + Input: + R1 fastq file name + This function checks for the filesize of a fastq file - a warning is sent if greater than size defined in config (currently 5GB) and the file is not moved + A rsync command is then issued to copy the fastq to the desired agilent upload folder + The stderr is sent to check_for_rsync_errors() to check for an expected word and a further alert is sent if "fail" or "error" are in stderr + Returns: + None + """ + # create a list of read1 and read2 fastqs + fastq_list=[fastq, fastq.replace("_R1_", "_R2_")] + for fastq_file in fastq_list: + self.loggers.script.info( + "UA_pass 'assessing OnePGT fastq file {}'".format(fastq_file) + ) + # test size of fastq + filesize = os.path.getsize(os.path.join(self.runfolder_obj.fastq_folder_path,fastq_file)) + if int(filesize) > config.max_filesize_in_bytes: + self.loggers.script.error( + "UA_fail 'fastq filesize check fail. {} is greater than {}. File has not been moved'".format( + fastq_file, config.max_filesize_in_GB + )) + else: + self.loggers.script.info("UA_pass 'fastq filesize check pass'") + # write rsync command to move fastq to agilent folder -v outputs in verbose mode + # use tee to write to file and stdout + cmd = "rsync -v {} {} | tee -a {}".format(os.path.join(self.runfolder_obj.fastq_folder_path,fastq_file), config.agilent_upload_folder, os.path.join(self.runfolder_obj.runfolderpath,self.runfolder_obj.runfolder_name+"_"+config.rsync_logfile)) + # run the command + out, err = self.execute_subprocess_command(cmd) + # pass stderr to function to look for errors + if not self.check_for_rsync_errors(err): + self.loggers.script.error( + "UA_fail 'onePGT fastq move via rsync failed for {}'".format(fastq_file) + ) + + def check_for_rsync_errors(self,stderr): + """ + Input: + stderr + rsync errors will be reported to stderr, not stdout + example error: + rsync: link_stat "/media/data3/share/testing/999999_NB552085_0077_AHYNCMAFXY/Data/Intensities/BaseCalls/NGS999999_01_242050_JB_U_VCP2R207StG_Pan4042_S1_R1_001.fastdxcq.gz" failed: No such file or directory (2) + rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1183) [sender=3.1.0]) + Returns: + Boolean True, unless the strings "fail" or "error" are seen. + """ + if "error" in stderr or "fail" in stderr: + return False + return True + def run_congenica_command(self, fastq, pannumber): """ Input = R1 fastq file name and pan number for a single sample @@ -1727,7 +1807,7 @@ def write_opms_queries_mokapipe(self, list_of_processed_samples): query = "insert into NGSCustomRuns(DNAnumber,PipelineVersion, RunID) values ('{}','{}','{}')" # if the pan number was processed using mokapipe and congenica, add the query to list of queries, capturing the DNA number from the fastq name if self.panel_dictionary[pannumber]["mokapipe"] and self.panel_dictionary[pannumber]["congenica_upload"]: - queries.append(query.format(str(fastq.split("_")[2]), config.mokapipe_congenica_pipeline_ID,self.runfolder_obj.runfolder_name)) + queries.append(query.format(str(fastq.split("_")[2]), config.mokapipe_congenica_pipeline_ID, self.runfolder_obj.runfolder_name)) if queries: # add workflow to sql dictionary @@ -1739,11 +1819,12 @@ def write_opms_queries_mokawes(self, list_of_processed_samples): """ Input = list of fastqs to be processed All samples processed using MokaWES are recorded in moka using a single update query. - If MokaWES samples - Function populates a dictionary of sample counts, and a query (str) to + If MokaWES samples - Function populates a dictionary of sample counts, query (str) and list of samplenames to be added to global dictionary. - Returns = dictionary or None + Returns = dictionary or None """ dnanumbers = [] + samplenames = [] # add workflow to sql dictionary for fastq in list_of_processed_samples: # take read one @@ -1754,6 +1835,8 @@ def write_opms_queries_mokawes(self, list_of_processed_samples): # capturing the DNA number from the fastq name if self.panel_dictionary[pannumber]["mokawes"]: dnanumbers.append(str(fastq.split("_")[2])) + # call function to build nexus fastq paths - returns tuple for read1 and read2 and samplename + samplenames.append(self.nexus_fastq_paths(fastq)[2]) if dnanumbers: return { "count": len(dnanumbers), @@ -1767,6 +1850,7 @@ def write_opms_queries_mokawes(self, list_of_processed_samples): + "') and StatusID = " + config.mokastat_nextsq_ID ], + "samplename_email": samplenames } else: return None @@ -1797,8 +1881,8 @@ def write_opms_queries_oncology(self, list_of_processed_samples): if "NTCcon" in fastq: id2 = "NULL" # define query with placeholders - query = "insert into NGSOncologyAudit(SampleID1,SampleID2,RunID,PipelineVersion,ngspanelid) values ('{}','{}','{}','{}','{}')" - + query = "insert into NGSOncologyAudit(SampleID1,SampleID2,RunID,PipelineVersion,ngspanelid) values ('{}','{}','{}','{}','{}')" + # for mokaamp and mokaonc if relevant build the query, populating the placeholders. # add the name of the workflow to the list of workflows if self.panel_dictionary[pannumber]["mokaamp"]: @@ -1888,6 +1972,21 @@ def send_opms_queries(self): # send email self.send_an_email(config.you, email_subject, email_message, email_priority) + if self.sql_queries["mokawes"]: + # send email to WES team to help IR upload + email_subject = ( + "MOKA ALERT : Started pipeline for " + self.runfolder_obj.runfolder_name + ) + email_message = ( + self.runfolder_obj.runfolder_name + + " being processed using " + + config.mokawes_path.split("/")[-1] + + "\nThe following samples are being processed:\n" + + "\n".join(self.sql_queries["mokawes"]["samplename_email"]) + ) + self.send_an_email(config.WES_sample_name_email_list, email_subject, email_message, email_priority) + # self.send_an_email(config.wes_email_address, email_subject, email_message, email_priority) + def upload_rest_of_runfolder(self): """ Input = None