From 456792d6f7bc1d176ab066a7106c097c85a8e841 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Wed, 25 Dec 2024 00:37:10 -0800 Subject: [PATCH 01/10] add INDED effect switch handling --- R/assess-strand-flip.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/R/assess-strand-flip.R b/R/assess-strand-flip.R index 0931898..e0fb4ac 100644 --- a/R/assess-strand-flip.R +++ b/R/assess-strand-flip.R @@ -216,6 +216,14 @@ assess.pgs.vcf.allele.match <- function( # identify insertion/deletion alleles in PGS if (nchar(current.pgs.ref.allele) > 1 || nchar(current.pgs.effect.allele) > 1) { pgs.indel <- TRUE; + if (effect.switch.candidate) { + # if an INDEL effect switch is detected, do not continue to strand flip assessment + # and return effect switch designation + flip.designation[i] <- 'effect_switch'; + flipped.effect.allele[i] <- current.pgs.effect.allele; + flipped.other.allele[i] <- current.pgs.ref.allele; + next; + } # no INDEL flipping supported, return either NA or the original allele warning('Mismatch detected in INDEL PGS allele. Skipping strand flip assessment.'); flip.designation[i] <- 'indel_mismatch'; @@ -234,6 +242,14 @@ assess.pgs.vcf.allele.match <- function( # identify insertion/deletion alleles in VCF if (nchar(current.vcf.ref.allele) > 1 || all(nchar(current.vcf.alt.allele.split) > 1)) { + if (effect.switch.candidate) { + # if an INDEL effect switch is detected, do not continue to strand flip assessment + # and return effect switch designation + flip.designation[i] <- 'effect_switch'; + flipped.effect.allele[i] <- current.pgs.effect.allele; + flipped.other.allele[i] <- current.pgs.ref.allele; + next; + } # no INDEL flipping supported, return either NA or the original allele warning('Mismatch detected in INDEL VCF allele. Skipping strand flip assessment.'); flip.designation[i] <- 'indel_mismatch'; From db6b1c96c075a671bbfa843ccbda783a12a3af77 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Wed, 25 Dec 2024 00:39:41 -0800 Subject: [PATCH 02/10] add test case for INDEL effect switch --- tests/testthat/test-strand-flip-handling.R | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-strand-flip-handling.R b/tests/testthat/test-strand-flip-handling.R index 876bc08..f7a6778 100644 --- a/tests/testthat/test-strand-flip-handling.R +++ b/tests/testthat/test-strand-flip-handling.R @@ -206,6 +206,12 @@ test_that( assess.pgs.vcf.allele.match('ATCG', 'A', 'G', 'A'), 'Mismatch detected in INDEL VCF allele. Skipping strand flip assessment.' ); + + # indel effect switch case + expect_silent( + assess.pgs.vcf.allele.match('A', 'ATCG', 'ATCG', 'A') + ); + # indel but everything matches expect_silent( assess.pgs.vcf.allele.match('ATCG', 'A', 'ATCG', 'A') @@ -214,26 +220,26 @@ test_that( # check indel handling when return.indels.as.missing == FALSE # VCF ALT allele is an INDEL test.leave.indels.vcf.alt <- assess.pgs.vcf.allele.match( - c('A', 'A'), - c('G', 'ATCG'), - c('A', 'A'), - c('G', 'G'), + c('A', 'A', 'A'), + c('G', 'ATCG', 'ATCG'), + c('A', 'A', 'ATCG'), + c('G', 'G', 'A'), return.indels.as.missing = FALSE ); expect_equal( test.leave.indels.vcf.alt$match.status, - c('default_match', 'indel_mismatch') + c('default_match', 'indel_mismatch', 'effect_switch') ); expect_equal( test.leave.indels.vcf.alt$new.pgs.effect.allele, - c('G', 'G') + c('G', 'G', 'A') ); expect_equal( test.leave.indels.vcf.alt$new.pgs.other.allele, - c('A', 'A') + c('A', 'A', 'ATCG') ); # VCF REF allele is an INDEL From e7af4530a287d42c85dd9448022de276e6da3421 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Wed, 25 Dec 2024 17:11:14 -0800 Subject: [PATCH 03/10] handle hemizygous genotype cases --- R/calculate-dosage.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/calculate-dosage.R b/R/calculate-dosage.R index 0a0f6a6..9e2bd8d 100644 --- a/R/calculate-dosage.R +++ b/R/calculate-dosage.R @@ -2,7 +2,7 @@ #' @description Convert genotype calls in the form of witten out alleles (e.g. 'A/T') to dosages (0, 1, 2) based on provided risk alleles from a PGS. #' @param called.alleles A vector of genotypes in allelic notation separated by a slash or pipe. #' @param risk.alleles A vector of risk alleles from a polygenic score corresponding to each genotype (by locus) in called.alleles. -#' @return A vector of dosages corresponding to each genotype in called.alleles. +#' @return A vector of dosages corresponding to each genotype in called.alleles. Hemizygous genotypes (one allele e.g. 'A' are counted as 1). #' @examples #' called.alleles <- c('A/A', 'A/T', 'T/T'); #' risk.alleles <- c('T', 'T', 'T'); @@ -31,12 +31,16 @@ convert.alleles.to.pgs.dosage <- function(called.alleles, risk.alleles) { } else { # check that called.alleles is a vector of genotypes in allelic notation or '.' separated by a slash or pipe # "*" characters represent overlapping deletions from an upstream indel and are accepted VCF format - allowed.pattern <- '^((([A-Z]+|\\.|\\*)[/\\|]([A-Z]+|\\.|\\*))|\\.)$' # '|' are special chars in regular expressions + allowed.pattern <- '^((([A-Z]+|\\.|\\*)[/\\|]([A-Z]+|\\.|\\*))|\\.|[A-Z]+)$' # '|' are special chars in regular expressions passing.alleles <- grepl(allowed.pattern, called.alleles); passing.alleles[is.na(called.alleles)] <- TRUE; # NA allowed if (!all(passing.alleles)) { stop('unrecognized called.alleles format, must be capitalized letters, "." or "*" separated by a slash or pipe.'); } + # replace hemizygous genotypes with a placeholder for easier splitting + # index for non-NA alleles that are missing allele separators: + no.sep.index <- (!grepl('/|\\|', called.alleles) & !is.na(called.alleles) & called.alleles != '.'); + called.alleles[no.sep.index] <- paste0(called.alleles[no.sep.index], '/-'); split.alleles <- data.table::tstrsplit(called.alleles, split = c('/|\\|'), keep = c(1,2)); # '|' are special chars in regular expressions } names(split.alleles) <- c('called.allele.a', 'called.allele.b'); From d68a4ca7ec4349e048e0b6d966a35bad7b91db25 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Wed, 25 Dec 2024 17:14:29 -0800 Subject: [PATCH 04/10] update dosage test cases --- tests/testthat/test-dosage-calculator.R | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-dosage-calculator.R b/tests/testthat/test-dosage-calculator.R index 4e6e68c..cabcc60 100644 --- a/tests/testthat/test-dosage-calculator.R +++ b/tests/testthat/test-dosage-calculator.R @@ -34,13 +34,6 @@ test_that( ); # check for correct called.alleles format - expect_error( - convert.alleles.to.pgs.dosage( - called.alleles = c('A/A', 'A'), - risk.alleles = c('A', 'T') - ), - 'unrecognized called.alleles format, must be capitalized letters, "." or "\\*" separated by a slash or pipe.' - ); expect_error( convert.alleles.to.pgs.dosage( called.alleles = c('A/A', 'A,'), @@ -101,8 +94,8 @@ test_that( # check that correct input is accepted expect_silent( convert.alleles.to.pgs.dosage( - called.alleles = c('A/A', 'A|T', 'TA/T', 'A/ATTTT', './.', '.', '*/T', 'T/*', '*/*'), - risk.alleles = c('A', 'T', 'A', 'T', 'A', 'T', 'A', 'T', 'A') + called.alleles = c('A/A', 'A|T', 'TA/T', 'A/ATTTT', './.', '.', '*/T', 'T/*', '*/*', 'A', 'T'), + risk.alleles = c('A', 'T', 'A', 'T', 'A', 'T', 'A', 'T', 'A', 'T', 'A') ) ); } @@ -132,6 +125,18 @@ test_that( } ); +test_that( + 'convert.alleles.to.pgs.dosage calculates dosage correctly from hemizygous genotypes', { + expect_equal( + convert.alleles.to.pgs.dosage( + called.alleles = c('A', 'T'), + risk.alleles = c('A', 'A') + ), + c(1, 0) + ); + } + ); + test_that( 'convert.alleles.to.pgs.dosage calculates dosage correctly from short indel alleles', { expect_equal( From 1a8c01322c29f4ebc9ab772ca9b0a213a5eaafb9 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Tue, 18 Feb 2025 12:08:03 -0800 Subject: [PATCH 05/10] more careful data filtering for statistical analysis --- R/apply-pgs.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/apply-pgs.R b/R/apply-pgs.R index f4360f9..15d1675 100644 --- a/R/apply-pgs.R +++ b/R/apply-pgs.R @@ -561,9 +561,12 @@ apply.polygenic.score <- function( ### Begin Phenotype Analysis ### if (!is.null(phenotype.analysis.columns)) { + # post merge data selection + pgs.for.phenotype.stats <- pgs.output[ , missing.method.to.colname.ref[analysis.source.pgs]]; + regression.output <- run.pgs.regression( - pgs = pgs.for.stats, - phenotype.data = subset(phenotype.data, select = phenotype.analysis.columns) + pgs = pgs.for.phenotype.stats, + phenotype.data = subset(pgs.output, select = phenotype.analysis.columns) ); } ### End Phenotype Analysis ### From d86a4b8220164915e4f926df5a7b6c290e0be61e Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Tue, 18 Feb 2025 12:11:05 -0800 Subject: [PATCH 06/10] update continuous var conditions in variable classification --- R/run-pgs-statistics.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/run-pgs-statistics.R b/R/run-pgs-statistics.R index a5c2269..1b03786 100644 --- a/R/run-pgs-statistics.R +++ b/R/run-pgs-statistics.R @@ -62,7 +62,7 @@ classify.variable.type <- function(data, continuous.threshold = 4) { continuous.vars.index <- sapply( X = data, FUN = function(x) { - 'numeric' == class(x) & continuous.threshold < length(unique(na.omit(x))); + ('numeric' == class(x) | 'integer' == class(x)) & continuous.threshold < length(unique(na.omit(x))); } ); binary.vars.index <- sapply( From ea8d8d45032e2893136f0cd47c45db150edb6973 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Tue, 18 Feb 2025 13:22:25 -0800 Subject: [PATCH 07/10] update dosage function manual --- man/convert.alleles.to.pgs.dosage.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/convert.alleles.to.pgs.dosage.Rd b/man/convert.alleles.to.pgs.dosage.Rd index 05f3afe..c1ea771 100644 --- a/man/convert.alleles.to.pgs.dosage.Rd +++ b/man/convert.alleles.to.pgs.dosage.Rd @@ -12,7 +12,7 @@ convert.alleles.to.pgs.dosage(called.alleles, risk.alleles) \item{risk.alleles}{A vector of risk alleles from a polygenic score corresponding to each genotype (by locus) in called.alleles.} } \value{ -A vector of dosages corresponding to each genotype in called.alleles. +A vector of dosages corresponding to each genotype in called.alleles. Hemizygous genotypes (one allele e.g. 'A' are counted as 1). } \description{ Convert genotype calls in the form of witten out alleles (e.g. 'A/T') to dosages (0, 1, 2) based on provided risk alleles from a PGS. From 92052b5e2cb591b6c984fa4014eb18f9c0358ca5 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Tue, 18 Feb 2025 13:22:52 -0800 Subject: [PATCH 08/10] increment version in Description --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 757305e..387d3af 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ApplyPolygenicScore Type: Package Title: Utilities for the Application of a Polygenic Score to a VCF -Version: 3.0.0 +Version: 3.0.1 Authors@R: c( person('Paul', 'Boutros', role = 'cre', email = 'PBoutros@mednet.ucla.edu'), person('Nicole', 'Zeltser', role = 'aut', comment = c(ORCID = '0000-0001-7246-2771')), From 83a36738ffa8317216beb2d0e28caff7e8a07195 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Tue, 18 Feb 2025 13:26:49 -0800 Subject: [PATCH 09/10] updated NEWS with changes and version --- NEWS.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS.md b/NEWS.md index 71958d7..0a39a99 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,14 @@ # Unreleased +# ApplyPolygenicScore 3.0.1 + +## Added +* Added hemizygous allele handling to dosage calculation + +## Changed +* Updated INDEL effect switch reporting by strand flip checker +* Updated data structuring for automated statistical analysis in `apply.polygenic.score` + # ApplyPolygenicScore 3.0.0 (2024-12-02) ## Added From dd9f6ec83ab3775e63e16a228a7c5db44f5b28a7 Mon Sep 17 00:00:00 2001 From: Nicole Zeltser Date: Tue, 18 Feb 2025 13:43:26 -0800 Subject: [PATCH 10/10] typo fix in manusl --- R/calculate-dosage.R | 2 +- man/convert.alleles.to.pgs.dosage.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/calculate-dosage.R b/R/calculate-dosage.R index 9e2bd8d..cb1f7f6 100644 --- a/R/calculate-dosage.R +++ b/R/calculate-dosage.R @@ -2,7 +2,7 @@ #' @description Convert genotype calls in the form of witten out alleles (e.g. 'A/T') to dosages (0, 1, 2) based on provided risk alleles from a PGS. #' @param called.alleles A vector of genotypes in allelic notation separated by a slash or pipe. #' @param risk.alleles A vector of risk alleles from a polygenic score corresponding to each genotype (by locus) in called.alleles. -#' @return A vector of dosages corresponding to each genotype in called.alleles. Hemizygous genotypes (one allele e.g. 'A' are counted as 1). +#' @return A vector of dosages corresponding to each genotype in called.alleles. Hemizygous genotypes (one allele e.g. 'A') are counted as 1. #' @examples #' called.alleles <- c('A/A', 'A/T', 'T/T'); #' risk.alleles <- c('T', 'T', 'T'); diff --git a/man/convert.alleles.to.pgs.dosage.Rd b/man/convert.alleles.to.pgs.dosage.Rd index c1ea771..9073cd8 100644 --- a/man/convert.alleles.to.pgs.dosage.Rd +++ b/man/convert.alleles.to.pgs.dosage.Rd @@ -12,7 +12,7 @@ convert.alleles.to.pgs.dosage(called.alleles, risk.alleles) \item{risk.alleles}{A vector of risk alleles from a polygenic score corresponding to each genotype (by locus) in called.alleles.} } \value{ -A vector of dosages corresponding to each genotype in called.alleles. Hemizygous genotypes (one allele e.g. 'A' are counted as 1). +A vector of dosages corresponding to each genotype in called.alleles. Hemizygous genotypes (one allele e.g. 'A') are counted as 1. } \description{ Convert genotype calls in the form of witten out alleles (e.g. 'A/T') to dosages (0, 1, 2) based on provided risk alleles from a PGS.