Skip to content

Commit

Permalink
quarto + excel 2024
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Apr 22, 2024
1 parent 52279c9 commit 8d291a6
Show file tree
Hide file tree
Showing 259 changed files with 37,310 additions and 3,007 deletions.
395 changes: 0 additions & 395 deletions examples/tumor_sample.BRCA.cna.tsv

This file was deleted.

Binary file removed examples/tumor_sample.BRCA.grch37.vcf.gz
Binary file not shown.
Binary file removed examples/tumor_sample.BRCA.grch37.vcf.gz.tbi
Binary file not shown.
Binary file removed examples/tumor_sample.BRCA.grch38.vcf.gz
Binary file not shown.
Binary file removed examples/tumor_sample.BRCA.grch38.vcf.gz.tbi
Binary file not shown.
265 changes: 0 additions & 265 deletions examples/tumor_sample.COAD.cna.tsv

This file was deleted.

Binary file removed examples/tumor_sample.COAD.grch37.vcf.gz
Binary file not shown.
Binary file removed examples/tumor_sample.COAD.grch37.vcf.gz.tbi
Binary file not shown.
Binary file removed examples/tumor_sample.COAD.grch38.vcf.gz
Binary file not shown.
Binary file removed examples/tumor_sample.COAD.grch38.vcf.gz.tbi
Binary file not shown.
43 changes: 26 additions & 17 deletions pcgr/arg_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,11 @@ def verify_args(arg_dict):

if arg_dict['tumor_only'] is True:

if arg_dict['control_dp_tag'] is not None:
if arg_dict['control_dp_tag'] is not None and arg_dict['control_dp_tag'] != "_NA_":
err_msg = f"Option '--tumor_only' does not allow '--control_dp_tag' option to be set ({arg_dict['control_dp_tag']})"
error_message(err_msg, logger)

if arg_dict['control_af_tag'] is not None:
if arg_dict['control_af_tag'] is not None and arg_dict['control_af_tag'] != "_NA_":
err_msg = f"Option '--tumor_only' does not allow '--control_af_tag' option to be set ({arg_dict['control_af_tag']})"
error_message(err_msg, logger)

Expand Down Expand Up @@ -209,6 +209,12 @@ def define_output_files(arg_dict, cpsr = False):
f"{output_prefix}.cna_segments.tsv.gz"
output_expression = \
f"{output_prefix}.expression.tsv.gz"
output_expression_outliers = \
f"{output_prefix}.expression_outliers.tsv.gz"
output_expression_similarity = \
f"{output_prefix}.expression_similarity.tsv.gz"
output_csq_expression = \
f"{output_prefix}.csq_expression.tsv.gz"

# if annotated output vcf exist and overwrite not set
if os.path.exists(output_vcf) and arg_dict["force_overwrite"] is False:
Expand Down Expand Up @@ -243,6 +249,9 @@ def define_output_files(arg_dict, cpsr = False):
if not cpsr:
output_data['cna'] = output_cna
output_data['expression'] = output_expression
output_data['csq_expression'] = output_csq_expression
output_data['expression_outliers'] = output_expression_outliers
output_data['expression_similarity'] = output_expression_similarity

return output_data

Expand All @@ -255,7 +264,7 @@ def verify_input_files(arg_dict):
input_vcf_dir = 'NA'
input_cna_dir = 'NA'
input_rna_fusion_dir = 'NA'
input_cpsr_report_dir = 'NA'
input_germline_dir = 'NA'
input_rna_expression_dir = 'NA'
pon_vcf_dir = 'NA'
db_dir = 'NA'
Expand All @@ -265,7 +274,7 @@ def verify_input_files(arg_dict):
input_cna_basename = 'NA'
input_rna_fusion_basename = 'NA'
input_rna_expression_basename = 'NA'
input_cpsr_report_basename = 'NA'
input_germline_basename = 'NA'
arg_dict['rna_fusion_tumor'] = None

# create output folder (if not already exists)
Expand Down Expand Up @@ -354,20 +363,20 @@ def verify_input_files(arg_dict):
input_rna_expression_dir = os.path.dirname(
os.path.abspath(arg_dict["input_rna_exp"]))

# check if input rna fusion variants exist
if not arg_dict["cpsr_report"] is None:
if not os.path.exists(os.path.abspath(arg_dict["cpsr_report"])):
# check if input germline calls (CPSR) exist
if not arg_dict["input_germline"] is None:
if not os.path.exists(os.path.abspath(arg_dict["input_germline"])):
err_msg = "Input file (" + \
str(arg_dict["cpsr_report"]) + ") does not exist"
str(arg_dict["input_germline"]) + ") does not exist"
error_message(err_msg, logger)
if not (os.path.abspath(arg_dict["cpsr_report"]).endswith(".json.gz")):
err_msg = "CPSR report file (" + os.path.abspath(
arg_dict["cpsr_report"]) + ") does not have the correct file extension (.json.gz)"
if not (os.path.abspath(arg_dict["input_germline"]).endswith(".tsv.gz")):
err_msg = "File with CPSR-classified germline calls (" + os.path.abspath(
arg_dict["input_germline"]) + ") does not have the correct file extension (.json.gz)"
error_message(err_msg, logger)
input_cpsr_report_basename = os.path.basename(
str(arg_dict["cpsr_report"]))
input_cpsr_report_dir = os.path.dirname(
os.path.abspath(arg_dict["cpsr_report"]))
input_germline_basename = os.path.basename(
str(arg_dict["input_germline"]))
input_germline_dir = os.path.dirname(
os.path.abspath(arg_dict["input_germline"]))

vep_dir = verify_vep_cache(arg_dict, logger)
refdata_assembly_dir = verify_refdata(arg_dict, logger, cpsr = True)
Expand All @@ -378,8 +387,8 @@ def verify_input_files(arg_dict):
"cna_dir": input_cna_dir,
"rna_fusion_dir": input_rna_fusion_dir,
"rna_expression_dir": input_rna_expression_dir,
"cpsr_report_dir": input_cpsr_report_dir,
"cpsr_report_basename": input_cpsr_report_basename,
"germline_dir": input_germline_dir,
"germline_basename": input_germline_basename,
"pon_vcf_dir": pon_vcf_dir,
"refdata_assembly_dir": refdata_assembly_dir,
"vep_dir": vep_dir,
Expand Down
29 changes: 14 additions & 15 deletions pcgr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ def create_config(arg_dict, workflow = "PCGR"):
'vep_gencode_basic': int(arg_dict['vep_gencode_basic'])
}

#conf_options['visual_reporting'] = {
# 'visual_theme': str(arg_dict['report_theme']),
# 'nonfloating_toc': int(arg_dict['report_nonfloating_toc'])
#}

conf_options['other'] = {
'vcfanno_n_proc': int(arg_dict['vcfanno_n_proc']),
'no_reporting': int(arg_dict['no_reporting']),
Expand Down Expand Up @@ -82,15 +77,16 @@ def create_config(arg_dict, workflow = "PCGR"):
'n_copy_gain': int(arg_dict['n_copy_gain'])
}

conf_options['gene_expression'] = {}
conf_options['gene_expression']['similarity_analysis'] = int(arg_dict['expression_sim'])
conf_options['gene_expression']['similarity_db'] = {}
conf_options['expression'] = {}
conf_options['expression']['run'] = int(not arg_dict['input_rna_exp'] is None)
conf_options['expression']['similarity_analysis'] = int(arg_dict['expression_sim'])
conf_options['expression']['similarity_db'] = {}
for db in arg_dict['expression_sim_db'].split(','):
conf_options['gene_expression']['similarity_db'][db] = 1
conf_options['expression']['similarity_db'][db] = 1
if db == 'tcga':
conf_options['gene_expression']['similarity_db']['tcga'] = {}
for cohort in pcgr_vars.TCGA_COHORTS:
conf_options['gene_expression']['similarity_db']['tcga'][cohort] = 1
conf_options['expression']['similarity_db']['tcga'] = {}
for cohort in pcgr_vars.DISEASE_COHORTS:
conf_options['expression']['similarity_db']['tcga'][cohort] = 1

conf_options['somatic_snv'] = {}
conf_options['somatic_snv']['allelic_support'] = {
Expand Down Expand Up @@ -139,9 +135,12 @@ def create_config(arg_dict, workflow = "PCGR"):

conf_options['molecular_data']['fname_cna_tsv'] = "None"
conf_options['molecular_data']['fname_expression_tsv'] = "None"
conf_options['molecular_data']['fname_tmb'] = "None"
for source in ['tcga','treehouse','depmap']:
conf_options['molecular_data']['fname_expression_sim_' + source] = "None"
conf_options['molecular_data']['fname_expression_outliers_tsv'] = "None"
#conf_options['molecular_data']['fname_expression_csq_tsv'] = "None"
conf_options['molecular_data']['fname_expression_similarity_tsv'] = "None"
conf_options['molecular_data']['fname_tmb_tsv'] = "None"
#for source in ['tcga','treehouse','depmap']:
# conf_options['molecular_data']['fname_expression_sim_' + source] = "None"


if workflow == "CPSR":
Expand Down
2 changes: 1 addition & 1 deletion pcgr/cpsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def run_cpsr(conf_options, input_data, output_data):
variant_set = \
variant.append_annotations(
output_pass_vcf2tsv_gz, refdata_assembly_dir = input_data["refdata_assembly_dir"], logger = logger)
variant_set = variant.clean_annotations(variant_set, yaml_data, germline = True, logger = logger)
variant_set = variant.clean_annotations(variant_set, yaml_data, logger = logger)

## If no genotypes are available, set conf['sample_properties']['genotypes_available'] = 1
if {'GENOTYPE'}.issubset(variant_set.columns):
Expand Down
Loading

0 comments on commit 8d291a6

Please sign in to comment.