Skip to content

Commit

Permalink
handle log10 p
Browse files Browse the repository at this point in the history
  • Loading branch information
Al-Murphy committed Jul 17, 2024
1 parent 08bb31d commit c79a47a
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 41 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: MungeSumstats
Type: Package
Title: Standardise summary statistics from GWAS
Version: 1.13.1
Version: 1.13.2
Authors@R:
c(person(given = "Alan",
family = "Murphy",
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## CHANGES IN VERSION 1.13.2

### New features
* Handling of -log10 p-values (outside of VCFs) added.

## CHANGES IN VERSION 1.13.1

### New features
Expand Down
4 changes: 4 additions & 0 deletions R/format_sumstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,10 @@ format_sumstats <- function(path,
"Corrected"=rep("ES",14))
mapping_file <- rbind(mapping_file,es_cols)
}

#### Check 40:Check for log10 p instead of p ####
sumstats_return <-
read_log_pval(sumstats_dt = sumstats_return$sumstats_dt)

#### Check 2:Check for effect direction ####
sumstats_return <-
Expand Down
72 changes: 72 additions & 0 deletions R/read_log_pval.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#' Read -log10 p-value column
#'
#' Parse p-value column in VCF file.of other general -loq10 p-values
#'
#' @param sumstats_dt Summary stats data.table.
#' @param return_list Binary, whether to return the dt in a list or not - list
#' is standard for the `format_sumstats()` function.
#' @inheritParams format_sumstats
#'
#' @returns Null output.
#' @keywords internal
#' @importFrom data.table setnames
read_log_pval <- function(sumstats_dt,mapping_file = sumstatsColHeaders,
return_list=TRUE){
P <- NULL
# Need to convert P-value, currently -log10
#cgreate a check based on mapping file
column_headers <- names(sumstats_dt)
column_headers_upper <- toupper(names(sumstats_dt))
#get all p mappings
mapping_file_p <- mapping_file[mapping_file$Corrected=="P",]
#now create all possible -log10 p-value
log_p_col <- c(paste0("-LOG10_",mapping_file_p$Uncorrected),
paste0("LOG10_",mapping_file_p$Uncorrected),
paste0("-LOG10 ",mapping_file_p$Uncorrected),
paste0("LOG10 ",mapping_file_p$Uncorrected),
paste0("-LOG10-",mapping_file_p$Uncorrected),
paste0("LOG10-",mapping_file_p$Uncorrected),
paste0("-LOG10.",mapping_file_p$Uncorrected),
paste0("LOG10.",mapping_file_p$Uncorrected),
paste0("-LOG10",mapping_file_p$Uncorrected),
paste0("LOG10",mapping_file_p$Uncorrected),
paste0("LOG_",mapping_file_p$Uncorrected),
paste0("-L_",mapping_file_p$Uncorrected),
paste0("L_",mapping_file_p$Uncorrected),
paste0("LOG-",mapping_file_p$Uncorrected),
paste0("-L-",mapping_file_p$Uncorrected),
paste0("L-",mapping_file_p$Uncorrected),
paste0("LOG.",mapping_file_p$Uncorrected),
paste0("-L.",mapping_file_p$Uncorrected),
paste0("L.",mapping_file_p$Uncorrected),
paste0("-L",mapping_file_p$Uncorrected),
paste0("L",mapping_file_p$Uncorrected),
"LP")
lp_found <- FALSE
for(col_i in column_headers_upper){
if(isTRUE(any(col_i==log_p_col))){
lp_found<-TRUE
lp_col_up<-col_i
}
}
if (isTRUE(lp_found)) {
messager("sumstats has -log10 P-values; these will be",
"converted to unadjusted p-values in the 'P' column.")
lp_col <- column_headers[[which(column_headers_upper %in% lp_col_up)]]
#check if -log10 or log10
if(max(sumstats_dt[,get(lp_col)])<=0){
#log10 - less likely so more lenient check
sumstats_dt[, P := 10^(as.numeric(get(lp_col)))]
}else{
#-log10 - more likely
sumstats_dt[, P := 10^(-1 * as.numeric(get(lp_col)))]
}

}
#### Return format ####
if(return_list){
return(list("sumstats_dt" = sumstats_dt))
}else {
return(sumstats_dt)
}
}
3 changes: 2 additions & 1 deletion R/read_vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ read_vcf <- function(path,
#### Prepare SNP column ####
read_vcf_markername(sumstats_dt = vcf_dt)
#### Prepare/un-log P col ####
read_vcf_pval(sumstats_dt = vcf_dt)
sumstats_dt <- read_log_pval(sumstats_dt = vcf_dt,
return_list=FALSE)
#### Prepare INFO col ####
read_vcf_info(sumstats_dt = vcf_dt)
#### Rename start col #####
Expand Down
21 changes: 0 additions & 21 deletions R/read_vcf_pval.R

This file was deleted.

32 changes: 32 additions & 0 deletions man/read_log_pval.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 0 additions & 18 deletions man/read_vcf_pval.Rd

This file was deleted.

51 changes: 51 additions & 0 deletions tests/testthat/test-log10_p.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
test_that("log10 p-value", {
## Call uses reference genome as default with more than 2GB of memory,
## which is more than what 32-bit Windows can handle so remove tests
is_32bit_windows <-
.Platform$OS.type == "windows" && .Platform$r_arch == "i386"
if (!is_32bit_windows) {
# read it in and make N
sumstats_dt <- data.table::fread(system.file("extdata", "eduAttainOkbay.txt",
package = "MungeSumstats"),
nThread = 1)
#test -log10
sumstats_dt[,LP:=-1*log(Pval,base=10)]
data.table::setnames(sumstats_dt,"Pval","Pval_org")
reformatted <- MungeSumstats::format_sumstats(sumstats_dt,
ref_genome = "GRCh37",
INFO_filter = 0.9,
on_ref_genome = FALSE,
strand_ambig_filter = FALSE,
bi_allelic_filter = FALSE,
allele_flip_check = FALSE,
log_folder_ind = FALSE,
imputation_ind = FALSE,
dbSNP=144,
ignore_multi_trait=TRUE,
return_data = TRUE)
testthat::expect_equal(reformatted$PVAL_ORG,reformatted$P)
#test log10
sumstats_dt <- data.table::fread(system.file("extdata", "eduAttainOkbay.txt",
package = "MungeSumstats"),
nThread = 1)
sumstats_dt[,LP:=log(Pval,base=10)]
data.table::setnames(sumstats_dt,"Pval","Pval_org")
reformatted <- MungeSumstats::format_sumstats(sumstats_dt,
ref_genome = "GRCh37",
INFO_filter = 0.9,
on_ref_genome = FALSE,
strand_ambig_filter = FALSE,
bi_allelic_filter = FALSE,
allele_flip_check = FALSE,
log_folder_ind = FALSE,
imputation_ind = FALSE,
dbSNP=144,
ignore_multi_trait=TRUE,
return_data = TRUE)
testthat::expect_equal(reformatted$PVAL_ORG,reformatted$P)
}
else{
testthat::expect_equal(is_32bit_windows, TRUE)
testthat::expect_equal(is_32bit_windows, TRUE)
}
})

0 comments on commit c79a47a

Please sign in to comment.