Skip to content

Commit

Permalink
minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Feb 3, 2024
1 parent 5a94eb3 commit f8d0606
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 24 deletions.
6 changes: 4 additions & 2 deletions pcgr/cpsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def run_cpsr(conf_options, cpsr_paths):
else:
logger.info(f"Diagnostic-grade genes in virtual panels (GE PanelApp): " + \
f"{'ON' if conf_options['gene_panel']['diagnostic_grade_only'] else 'OFF'}")
logger.info(f"Include incidental findings (ACMG recommended list v3.1): " + \
logger.info(f"Include incidental findings (ACMG recommended list v3.2): " + \
f"{'ON' if conf_options['variant_classification']['secondary_findings'] else 'OFF'}")
logger.info(f"Include low to moderate cancer risk variants from genome-wide association studies: " + \
f"{'ON' if conf_options['variant_classification']['gwas_findings'] else 'OFF'}")
Expand Down Expand Up @@ -204,6 +204,7 @@ def run_cpsr(conf_options, cpsr_paths):
output_vcf = vep_vcf)

logger = getlogger('cpsr-vep')
#print(str(vep_command["main"]))

logger.info((
f"CPSR - STEP 1: Basic variant annotation with Variant Effect Predictor (version {pcgr_vars.VEP_VERSION}, "
Expand All @@ -223,6 +224,7 @@ def run_cpsr(conf_options, cpsr_paths):
check_subprocess(logger, vep_command["tabix"], debug)
logger.info("Finished cpsr-vep")
print('----')
#exit(0)

## CPSR|vcfanno - run vcfanno on query VCF with a number of relevant annotated VCFs
logger = getlogger('cpsr-vcfanno')
Expand Down Expand Up @@ -289,7 +291,7 @@ def run_cpsr(conf_options, cpsr_paths):
outfile.write(yaml.dump(yaml_data))
outfile.close()

variant_set.to_csv(output_pass_tsv_gz, sep="\t", compression="gzip", index=False)
variant_set.fillna('.').to_csv(output_pass_tsv_gz, sep="\t", compression="gzip", index=False)
if not debug:
remove_file(output_pass_vcf2tsv_gz)

Expand Down
6 changes: 3 additions & 3 deletions pcgr/pcgr_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pcgr._version import __version__

PCGR_VERSION = __version__
DB_VERSION = '20231212'
DB_VERSION = '20240203'

## MISCELLANEOUS
NCBI_BUILD_MAF = 'GRCh38'
Expand All @@ -12,13 +12,13 @@
RECOMMENDED_N_MUT_SIGNATURE = 200

## GENCODE
GENCODE_VERSION = {'grch38': 44,'grch37': 19}
GENCODE_VERSION = {'grch38': 45,'grch37': 19}

## vcfanno
VCFANNO_MAX_PROC = 15

## VEP settings/versions
VEP_VERSION = '110'
VEP_VERSION = '111'
VEP_ASSEMBLY = {'grch38': 'GRCh38','grch37': 'GRCh37'}
VEP_MIN_FORKS = 1
VEP_MAX_FORKS = 8
Expand Down
2 changes: 1 addition & 1 deletion pcgrr/R/input_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ load_dna_variants <- function(
.data$EVIDENCE_ID, sep=";"
) |>
dplyr::group_by(
.data$EVIDENCE_ID
EVIDENCE_ID
) |>
dplyr::summarise(
CITATION = paste(
Expand Down
92 changes: 78 additions & 14 deletions pcgrr/R/reference_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ load_reference_data <- function(
pcgr_ref_data[["gene"]][["panel"]] <- data.frame()
pcgr_ref_data[["gene"]][["cpg"]] <- data.frame()
pcgr_ref_data[['gene']][['gene_xref']] <- data.frame()
pcgr_ref_data[['gene']][['transcript_xref']] <- data.frame()

cpg_tsv_fname <- file.path(
pcgr_db_assembly_dir, "gene", "tsv",
Expand Down Expand Up @@ -156,6 +157,23 @@ load_reference_data <- function(
"gene_transcript_xref.tsv.gz"
)
check_file_exists(gene_xref_tsv_fname)

pcgr_ref_data[['gene']][['transcript_xref']] <- as.data.frame(
readr::read_tsv(gene_xref_tsv_fname, show_col_types = F)) |>
dplyr::select(
c("chrom",
"ensembl_gene_id",
"ensembl_transcript_id",
"gencode_transcript_biotype",
"gene_biotype"
)
) |>
dplyr::distinct()

colnames(pcgr_ref_data[['gene']][['transcript_xref']]) <-
toupper(colnames(pcgr_ref_data[['gene']][['transcript_xref']]))


pcgr_ref_data[['gene']][['gene_xref']] <- as.data.frame(
readr::read_tsv(gene_xref_tsv_fname, show_col_types = F)) |>
dplyr::select(
Expand All @@ -175,7 +193,7 @@ load_reference_data <- function(
"cancergene_evidence")
) |>
dplyr::rename(
genename = .data$name
genename = name
) |>
dplyr::mutate(
entrezgene = as.character(.data$entrezgene)
Expand Down Expand Up @@ -251,6 +269,27 @@ load_reference_data <- function(
toupper(colnames(pcgr_ref_data[['variant']][['gwas']]))


pcgr_ref_data[['variant']][['varstats']] <- list()
## Get variant statistics
for(vardb in c('clinvar','gwas','tcga',
'gnomad_non_cancer','dbmts',
'dbnsfp')){
varstats_fname <-
file.path(
pcgr_db_assembly_dir, "variant", "vcf", vardb,
paste0(vardb,".vcf_varstats.tsv")
)

if(file.exists(varstats_fname)){
pcgr_ref_data[['variant']][['varstats']][[vardb]] <-
as.data.frame(
readr::read_tsv(
varstats_fname, show_col_types = F))
}

}



## 3. Phenotype ontologies

Expand Down Expand Up @@ -294,54 +333,79 @@ load_reference_data <- function(
file.path(
pcgr_db_assembly_dir, "misc", "other",
"msi_classification",
"msi_classification.rds"
"tcga_msi_classifier.rds"
)
check_file_exists(msi_model_rds)
pcgr_ref_data[['msi']] <-
readRDS(msi_model_rds)


pcgr_ref_data[['misc']] <- list()
## 5. Miscellaneous
for(elem in c('tmb',
'mutational_signature',
'pathway')){
'pathway',
'hotspot',
'protein_domain')){

fname_misc <- file.path(
pcgr_db_assembly_dir, "misc", "tsv", elem,
paste0(elem,".tsv.gz")
)

# if(elem == 'hotspot'){
# fname_misc <- file.path(
# pcgr_db_assembly_dir, "misc", "tsv", elem,
# paste0(elem,".tsv.gz")
# )
# }

check_file_exists(fname_misc)
pcgr_ref_data[[elem]] <- as.data.frame(
pcgr_ref_data[['misc']][[elem]] <- as.data.frame(
readr::read_tsv(
fname_misc, show_col_types = F,
na = ".")
)
colnames(pcgr_ref_data[[elem]]) <-
toupper(colnames(pcgr_ref_data[[elem]]))
colnames(pcgr_ref_data[['misc']][[elem]]) <-
toupper(colnames(pcgr_ref_data[['misc']][[elem]]))

}

tmp = pcgr_ref_data[['pathway']]
pcgr_ref_data[['pathway']] <- list()
pcgr_ref_data[['pathway']][['long']] <- tmp
pcgr_ref_data[['pathway']][['wide']] <- as.data.frame(
tmp = pcgr_ref_data[['misc']][['pathway']]
pcgr_ref_data[['misc']][['pathway']] <- list()
pcgr_ref_data[['misc']][['pathway']][['long']] <- tmp
pcgr_ref_data[['misc']][['pathway']][['wide']] <- as.data.frame(
tmp |>
dplyr::group_by(.data$GENE_ID) |>
dplyr::summarise(LINK = paste(.data$URL_HTML, collapse = ", ")))


## 6. Drugs

pcgr_ref_data[['drug']] <- list()
drug_tsv_fname <-
file.path(
pcgr_db_assembly_dir, "drug",
"tsv", "drug.tsv.gz"
"tsv", "drug_targeted.tsv.gz"
)
check_file_exists(drug_tsv_fname)
pcgr_ref_data[['drug']] <- as.data.frame(
pcgr_ref_data[['drug']][['targeted']] <- as.data.frame(
readr::read_tsv(drug_tsv_fname, show_col_types = F, na = ".")
)
colnames(pcgr_ref_data[['drug']]) <-
toupper(colnames(pcgr_ref_data[['drug']]))
colnames(pcgr_ref_data[['drug']][['targeted']]) <-
toupper(colnames(pcgr_ref_data[['drug']][['targeted']]))

drug_all_tsv_fname <-
file.path(
pcgr_db_assembly_dir, "drug",
"tsv", "drug_all.tsv.gz"
)
check_file_exists(drug_all_tsv_fname)
pcgr_ref_data[['drug']][['all']] <- as.data.frame(
readr::read_tsv(drug_all_tsv_fname, show_col_types = F, na = ".")
)
colnames(pcgr_ref_data[['drug']][['all']]) <-
toupper(colnames(pcgr_ref_data[['drug']][['all']]))

## 7. Biomarkers
pcgr_ref_data[['biomarker']] <- list()
Expand Down
12 changes: 8 additions & 4 deletions pcgrr/data-raw/data-raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@ for (c in c("pathogenicity", "clinical_evidence", "tier",
}
if (c == "clinical_evidence") {
color_palette[[c]][["levels"]] <-
c("A: Validated", "A: FDA/NCCN/ELN guidelines",
"B: Clinical evidence", "B1: Clinical evidence: late trials",
"B2: Clinical evidence: early trials", "C: Case study",
"D: Preclinical evidence", "E: Indirect evidence")
c("A: Validated",
"A: FDA/NCCN/ELN guidelines",
"B: Clinical evidence",
"B1: Clinical evidence: late trials",
"B2: Clinical evidence: early trials",
"C: Case study",
"D: Preclinical evidence",
"E: Indirect evidence")
color_palette[[c]][["values"]] <-
c("#009E73", "#009E73", "#56B4E9", "#56B4E9",
"#56B4E9", "#0072B2", "#E69F00", "#F0E442")
Expand Down

0 comments on commit f8d0606

Please sign in to comment.