Skip to content

Commit

Permalink
Merge pull request #353 from moka-guys/v41.0.0
Browse files Browse the repository at this point in the history
V41.0.0
  • Loading branch information
Aled Jones authored Sep 13, 2021
2 parents 004a1ca + b16aa39 commit 5c73003
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 82 deletions.
53 changes: 20 additions & 33 deletions automate_demultiplex_config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Automate demultiplex configuration.
The variables defined in this module are required by the "demultiplex.py" and
"DNANexus_upload_agent.py" scripts.
The variables defined in this module are required by the "demultiplex.py",
"upload_and_setoff_workflows.py" and "decision_support_tool_inputs.py" scripts.
"""

import os
Expand Down Expand Up @@ -114,10 +114,8 @@
# Current MokaWES ID
mokawes_pipeline_ID = "4318"
# MokaAMP ID
mokaamp_pipeline_ID = "4725"
# MokaONC ID
mokaonc_pipeline_ID = "4532"
# MokaONC ID
mokaamp_pipeline_ID = "4816"
# Archer ID
archerDx_pipeline_ID = "4562"
# SNP Genotyping ID
snp_genotyping_pipeline_ID = "4480"
Expand Down Expand Up @@ -147,10 +145,8 @@
# path to the WES workflow in the app project
mokawes_path = "Workflows/MokaWES_v1.8"

# path to the oncology workflow in the app project
mokaonc_path = "Workflows/Mokaonc_v1.6"
# path to mokaamp
mokaamp_path = "Workflows/MokaAMP_v1.7"
mokaamp_path = "Workflows/MokaAMP_v2.1"
# path to mokacan
mokacan_path = "Workflows/MokaCAN_v1.0"
#path to snp_genotyping
Expand Down Expand Up @@ -221,18 +217,13 @@
#bcftools input
snp_bcftools_input = " -istage-FvGkxzj02Bk06Y687Xk8jJp0.in"

# MokaOnc amplivar fastq input
mokaonc_fq_input = " -istage-F7kPz6Q0vpxb0YpjBgQx5f8v.fastqs="
# ingenuity app input for amplivar workflow
mokaonc_ingenuity = " -istage-F5k1Qyj0jy1VKJb2KYqq7fxG.email="

# MokaAMP - stages that may change between samples/panels
# MokaAMP - stages that may change between samples/panels
mokaamp_fastq_R1_stage = " -istage-FPzGj780jy1g3p1F4F8z4J7V.reads_fastqgz="
mokaamp_fastq_R2_stage = " -istage-FPzGj780jy1g3p1F4F8z4J7V.reads2_fastqgz="
mokaamp_bwa_rg_sample = " -istage-FPzGj780jy1g3p1F4F8z4J7V.read_group_sample="
mokaamp_mokapicard_bed_stage = " -istage-FPzGjV80jy1x97jg607Fg22b.vendor_exome_bedfile="
mokaamp_mokapicard_capturetype_stage = " -istage-FPzGjV80jy1x97jg607Fg22b.Capture_panel="
mokaamp_bamclipper_BEDPE_stage = " -istage-FPzGjJQ0jy1fF6505zFP6zz9.primers="
mokaamp_ampliconfilter_BEDPE_stage = " -istage-FPzGjJQ0jy1fF6505zFP6zz9.BEDPE="
mokaamp_chanjo_cov_level_stage = " -istage-FPzGjfQ0jy1y01vG60K22qG1.coverage_level="
mokaamp_sambamba_bed_stage = " -istage-FPzGjfQ0jy1y01vG60K22qG1.sambamba_bed="
mokaamp_vardict_bed_stage = " -istage-G0vKZk80GfYkQx86PJGGjz9Y.bedfile="
Expand Down Expand Up @@ -324,8 +315,8 @@

# =====List of all panel numbers=====
panel_list = [
"Pan1190", # swift EGFR
"Pan2684", # swift 57
"Pan4081", # swift EGFR
"Pan4082", # swift 57
"Pan2835", # twist WES
"Pan4042", # STG VCP2 BRCA
"Pan4043", # STG VCP3
Expand Down Expand Up @@ -372,8 +363,8 @@
WES_panel_lists = ["Pan2835","Pan3174"]
SNP_panel_lists = ["Pan4009"]
archer_panel_list = ["Pan4396"]
swift_57G_panel_list = ["Pan2684"]
swift_egfr_panel_list = ["Pan1190"]
swift_57G_panel_list = ["Pan4082"]
swift_egfr_panel_list = ["Pan4081"]
mokacan_panel_list = ["Pan4573","Pan4574"]

default_panel_properties = {
Expand All @@ -386,7 +377,6 @@
"joint_variant_calling": False,
"mokaamp": False,
"capture_type": "Hybridisation", # "Amplicon" or "Hybridisation"
"mokaonc": False,
"mokacan": False,
"snp_genotyping": False,
"mokapipe": False,
Expand Down Expand Up @@ -432,26 +422,23 @@
"sambamba_bedfile": "Pan493dataSambamba.bed",
"peddy": True,
},
"Pan1190": { # EGFR SWIFT Panel
"oncology": True,
"mokaonc": True,
"Pan4081": { # EGFR SWIFT Panel
"mokaamp": True,
"oncology": True,
"capture_type": "Amplicon",
"clinical_coverage_depth": 600,
"clinical_coverage_depth": 600, # only found in mokamp command
"multiqc_coverage_level": 100,
"mokaamp_bed_PE_input":"Pan3638_PE.bed",
"mokaamp_variant_calling_bed":"Pan3638_flat.bed",
"hsmetrics_bedfile": "Pan3638.bed",
"sambamba_bedfile": "Pan3638Sambamba.bed",
"destination_command": "MokaAMP_EGFR_trial"
},
"Pan2684": { # 57G SWIFT panel
"RPKM_bedfile_pan_number": None,
"hsmetrics_bedfile": "Pan4081.bed",
"sambamba_bedfile": "Pan4081Sambamba.bed",
},
"Pan4082": { # 57G SWIFT panel
"mokaamp": True,
"oncology": True,
"capture_type": "Amplicon",
"clinical_coverage_depth": 600, # only found in mokamp command
"multiqc_coverage_level": 100,
"hsmetrics_bedfile": "Pan4082.bed",
"sambamba_bedfile": "Pan4082Sambamba.bed",
},
"Pan4044": { # VCP1 STG
"mokapipe": True,
Expand Down
52 changes: 3 additions & 49 deletions upload_and_setoff_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ def __init__(self, runfolder, now, debug_mode=False):
)
self.smartsheet_update_command = "dx run " + config.app_project + config.smartsheet_path
self.RPKM_command = "dx run " + config.app_project + config.RPKM_path + " --instance-type mem1_ssd1_x8"
self.mokaonc_command = "jobid=$(dx run " + config.app_project + config.mokaonc_path + " -y "
self.mokaamp_command = (
"jobid=$(dx run " + config.app_project + config.mokaamp_path + " -y --name "
)
Expand Down Expand Up @@ -1026,7 +1025,7 @@ def nexus_bedfiles(self, pannumber):
else:
bed_dict["variant_calling_bedfile"] = None

# paired end BED file used by primer clipping tool
# paired end BED file used by primer clipping tool
if self.panel_dictionary[pannumber]["mokaamp_bed_PE_input"]:
bed_dict["mokaamp_bed_PE_input"] = (
config.app_project
Expand Down Expand Up @@ -1079,7 +1078,6 @@ def start_building_dx_run_cmds(self, list_of_processed_samples):
commands_list.append(self.source_command)

# lists/flags for run wide commands
mokaonc_list = []
peddy = False
congenica_upload = False
joint_variant_calling = False # not currently in use
Expand Down Expand Up @@ -1129,10 +1127,6 @@ def start_building_dx_run_cmds(self, list_of_processed_samples):
# add panel to RPKM list
if self.panel_dictionary[panel]["RPKM_bedfile_pan_number"]:
rpkm_list.append(panel)

# If panel is to be processed using MokaONC
if self.panel_dictionary[panel]["mokaonc"]:
mokaonc_list.append(fastq)

# If panel is to be processed using MokaAMP
if self.panel_dictionary[panel]["mokaamp"]:
Expand Down Expand Up @@ -1173,8 +1167,6 @@ def start_building_dx_run_cmds(self, list_of_processed_samples):
)

# build run wide commands
if mokaonc_list:
commands_list.append(self.create_mokaonc_command(mokaonc_list))
if joint_variant_calling:
commands_list.append(self.create_joint_variant_calling_command())
if rpkm_list:
Expand Down Expand Up @@ -1374,35 +1366,6 @@ def create_mokapipe_command(self, fastq, pannumber):

return dx_command

def create_mokaonc_command(self, mokaonc_list):
"""
Input = List of read1 fastqs.
MokaONC only supports one panel (Pan1190) so some values are hard coded here
This pipeline is soon to be discontinued
Returns = one dx run command for all samples (string)
"""
# start dx run command capturing job id etc
dx_command = self.mokaonc_command
# loop through the list of read 1 fastqs
for sample_fq in mokaonc_list:
# call function to build nexus fastq paths - returns tuple for read1 and read2
fastqs = self.nexus_fastq_paths(sample_fq)
# add each as an input
dx_command += config.mokaonc_fq_input + fastqs[0] + config.mokaonc_fq_input + fastqs[1]

# create the dx command - NB only one panel is supported by MokaONC hense hard coded pan number
command_out = (
dx_command
+ self.dest
+ self.dest_cmd
+ "amplivar_output"
+ " --stage-output-folder stage-G0KYx8Q0GfYvbVg49bYf9p9g "
+ self.dest_cmd
+ self.token
)

return command_out

def build_iva_input_command(self):
"""
Inputs = None
Expand Down Expand Up @@ -1460,12 +1423,6 @@ def create_mokaamp_command(self, fastq, pannumber):
# build nexus fastq paths - returns tuple for read1 and read2 and dictionary for bed files
fastqs = self.nexus_fastq_paths(fastq)
bedfiles = self.nexus_bedfiles(pannumber)

# we may want to run this pipeline along side mokaONC (which should be used to analyse).
# To avoid confusion we need to change the destination to ensure files produced by this pipeline are not used for analysis
dest_cmd = self.dest_cmd
if self.panel_dictionary[pannumber]["destination_command"]:
dest_cmd += self.panel_dictionary[pannumber]["destination_command"]

# create the MokaAMP dx command
dx_command_list = [
Expand All @@ -1481,7 +1438,7 @@ def create_mokaamp_command(self, fastq, pannumber):
bedfiles["hsmetrics"],
config.mokaamp_mokapicard_capturetype_stage,
self.panel_dictionary[pannumber]["capture_type"],
config.mokaamp_bamclipper_BEDPE_stage,
config.mokaamp_ampliconfilter_BEDPE_stage,
bedfiles["mokaamp_bed_PE_input"],
config.mokaamp_chanjo_cov_level_stage,
self.panel_dictionary[pannumber]["clinical_coverage_depth"],
Expand All @@ -1504,7 +1461,7 @@ def create_mokaamp_command(self, fastq, pannumber):
config.mokaamp_vardict_reference_stage,
config.mokaamp_varscan_reference_stage,
self.dest,
dest_cmd,
self.dest_cmd,
self.token,
]

Expand Down Expand Up @@ -2121,9 +2078,6 @@ def write_opms_queries_oncology(self, list_of_processed_samples):
if self.panel_dictionary[pannumber]["mokaamp"]:
queries.append(query.format(id1, id2, self.runfolder_obj.runfolder_name, config.mokaamp_pipeline_ID, pannumber_no_pan))
workflows.append(config.mokaamp_path.split("/")[-1])
if self.panel_dictionary[pannumber]["mokaonc"]:
queries.append(query.format(id1, id2, self.runfolder_obj.runfolder_name, config.mokaonc_pipeline_ID, pannumber_no_pan))
workflows.append(config.mokaonc_path.split("/")[-1])
if self.panel_dictionary[pannumber]["archerdx"]:
queries.append(query.format(id1, id2, self.runfolder_obj.runfolder_name, config.archerDx_pipeline_ID, pannumber_no_pan))
workflows.append(config.fastqc_app.split("/")[-1])
Expand Down

0 comments on commit 5c73003

Please sign in to comment.