Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenturner committed Sep 25, 2014
2 parents ccb08c8 + 29d82b2 commit 49c68e9
Show file tree
Hide file tree
Showing 13 changed files with 490 additions and 226 deletions.
6 changes: 6 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
^.*\.Rproj$
^\.Rproj\.user$
LICENSE
NEWS.md
misc/
assets/
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
._*
.Trashes

.Rbuildignore
.Rhistory
.Rproj.user
*.Rproj
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# qqman 0.1.2

* Does not assume that SNPs are evenly distributed across chromosomes when deciding where to place the tick in the center of the chromosome.
* Changed single chromosome x-axis notation to use Mb instead of raw pos
* `qq()` accepts graphical parameters the same way as `manhattan()`
* Removed default `xlim`
* Citation details on package load
* Added axis label options
* Removed `ymax` argument in favor of allowing user to set `ylim` in `...`
* Option to *not* take log of p-value

# qqman 0.1.1

* Fixed a bunch of typos in the vignette
84 changes: 51 additions & 33 deletions R/manhattan.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
#' @param snp A string denoting the column name for the SNP name (rs number).
#' Defaults to PLINK's "SNP." Said column should be a character.
#' @param col A character vector indicating which colors to alternate.
#' @param ymax The upper limit to the y-axis. Set automatically based on the
#' most significant SNP unless set here specifically.
#' @param chrlabs A character vector equal to the number of chromosomes
#' specifying the chromosome labels (e.g., \code{c(1:22, "X", "Y", "MT")}).
#' @param suggestiveline Where to draw a "suggestive" line. Default
#' -log10(1e-5). Set to FALSE to disable.
#' @param genomewideline Where to draw a "genome-wide sigificant" line. Default
#' -log10(5e-8). Set to FALSE to disable.
#' @param highlight A character vector of SNPs in your dataset to highlight.
#' These SNPs should all be in your dataset.
#' @param logp If TRUE, the -log10 of the p-value is plotted. It isn't very
#' @param logp If TRUE, the -log10 of the p-value is plotted. It isn't very
#' useful to plot raw p-values, but plotting the raw value could be useful for
#' other genome-wide plots, for example, peak heights, bayes factors, test
#' other genome-wide plots, for example, peak heights, bayes factors, test
#' statistics, other "scores," etc.
#' @param ... Arguments passed on to other plot/points functions
#'
Expand All @@ -38,12 +38,12 @@
#' @export

manhattan <- function(x, chr="CHR", bp="BP", p="P", snp="SNP",
col=c("gray10", "gray60"), ymax=NULL,
col=c("gray10", "gray60"), chrlabs=NULL,
suggestiveline=-log10(1e-5), genomewideline=-log10(5e-8),
highlight=NULL, logp=TRUE, ...) {

# Not sure why, but package check will warn without this.
P=index=NULL
CHR=BP=P=index=NULL

# Check for sensible dataset
## Make sure you have chr, bp and p columns.
Expand All @@ -64,8 +64,10 @@ manhattan <- function(x, chr="CHR", bp="BP", p="P", snp="SNP",
if (!is.null(x[[snp]])) d=transform(d, SNP=x[[snp]])

# Set positions, ticks, and labels for plotting
## Sort, keep only SNPs with p-values between 0 and 1
d=subset(d[order(d$CHR, d$BP), ], (P>0 & P<=1 & is.numeric(P)))
## Sort and keep only values where is numeric.
#d <- subset(d[order(d$CHR, d$BP), ], (P>0 & P<=1 & is.numeric(P)))
d <- subset(d, (is.numeric(CHR) & is.numeric(BP) & is.numeric(P)))
d <- d[order(d$CHR, d$BP), ]
#d$logp <- ifelse(logp, yes=-log10(d$P), no=d$P)
if (logp) {
d$logp <- -log10(d$P)
Expand All @@ -74,22 +76,6 @@ manhattan <- function(x, chr="CHR", bp="BP", p="P", snp="SNP",
}
d$pos=NA

# Set y maximum. If ymax is undefined, not numeric, or negative, set it
# equal to the most significant SNP.
if (is.null(ymax)) { # still null
ymax = ceiling(max(d$logp))
message("Ymax will be set automatically based on most significant SNP")
} else if (!is.numeric(ymax)){ # not numeric
ymax = ceiling(max(d$logp))
warning('non-numeric ymax argument.')
} else if (ymax < 0) { # negative
ymax = ceiling(max(d$logp))
warning('negative ymax argument.')
} else if (is.null(ymax)) { # still null
ymax = ceiling(max(d$logp))
message(paste("Using", ymax, "as ymax"))
}
message(paste("Using", ymax, "as ymax"))

# Fixes the bug where one chromosome is missing by adding a sequential index column.
d$index=NA
Expand All @@ -110,9 +96,10 @@ manhattan <- function(x, chr="CHR", bp="BP", p="P", snp="SNP",
# 3 1 5
nchr = length(unique(d$CHR))
if (nchr==1) { ## For a single chromosome
d$pos=d$BP
options(scipen=999)
d$pos=d$BP/1e6
ticks=floor(length(d$pos))/2+1
xlabel = paste('Chromosome',unique(d$CHR),'position')
xlabel = paste('Chromosome',unique(d$CHR),'position(Mb)')
labs = ticks
} else { ## For multiple chromosomes
lastbase=0
Expand All @@ -124,19 +111,50 @@ manhattan <- function(x, chr="CHR", bp="BP", p="P", snp="SNP",
lastbase=lastbase+tail(subset(d,index==i-1)$BP, 1)
d[d$index==i, ]$pos=d[d$index==i, ]$BP+lastbase
}
ticks=c(ticks, d[d$index==i, ]$pos[floor(length(d[d$index==i, ]$pos)/2)+1])
# Old way: assumes SNPs evenly distributed
# ticks=c(ticks, d[d$index==i, ]$pos[floor(length(d[d$index==i, ]$pos)/2)+1])
# New way: doesn't make that assumption
ticks = c(ticks, (min(d[d$CHR == i,]$pos) + max(d[d$CHR == i,]$pos))/2 + 1)
}
xlabel = 'Chromosome'
#labs = append(unique(d$CHR),'') ## I forgot what this was here for... if seems to work, remove.
labs=unique(d$CHR)
labs <- unique(d$CHR)
}

# Initialize plot
xmax = ceiling(max(d$pos) * 1.03)
xmin = floor(max(d$pos) * -0.03)
ymin = 0
plot(NULL, xaxt='n', bty='n', xaxs='i', yaxs='i', xlim=c(xmin,xmax), ylim=c(ymin,ymax),
xlab=xlabel, ylab=expression(-log[10](italic(p))), las=1, pch=20, ...)

# The old way to initialize the plot
# plot(NULL, xaxt='n', bty='n', xaxs='i', yaxs='i', xlim=c(xmin,xmax), ylim=c(ymin,ymax),
# xlab=xlabel, ylab=expression(-log[10](italic(p))), las=1, pch=20, ...)


# The new way to initialize the plot.
## See http://stackoverflow.com/q/23922130/654296
## First, define your default arguments
def_args <- list(xaxt='n', bty='n', xaxs='i', yaxs='i', las=1, pch=20,
xlim=c(xmin,xmax), ylim=c(0,ceiling(max(d$logp))),
xlab=xlabel, ylab=expression(-log[10](italic(p))))
## Next, get a list of ... arguments
#dotargs <- as.list(match.call())[-1L]
dotargs <- list(...)
## And call the plot function passing NA, your ... arguments, and the default
## arguments that were not defined in the ... arguments.
do.call("plot", c(NA, dotargs, def_args[!names(def_args) %in% names(dotargs)]))

# If manually specifying chromosome labels, ensure a character vector and number of labels matches number chrs.
if (!is.null(chrlabs)) {
if (is.character(chrlabs)) {
if (length(chrlabs)==length(labs)) {
labs <- chrlabs
} else {
warning("You're trying to specify chromosome labels but the number of labels != number of chromosomes.")
}
} else {
warning("If you're trying to specify chromosome labels, chrlabs must be a character vector")
}
}

# Add an axis.
if (nchr==1) { #If single chromosome, ticks and labels automatic.
Expand All @@ -150,7 +168,7 @@ manhattan <- function(x, chr="CHR", bp="BP", p="P", snp="SNP",

# Add points to the plot
if (nchr==1) {
with(d, points(pos, logp, pch=20, ...))
with(d, points(pos, logp, pch=20, col=col[1], ...))
} else {
# if multiple chromosomes, need to alternate colors and increase the color index (icol) each chr.
icol=1
Expand All @@ -171,4 +189,4 @@ manhattan <- function(x, chr="CHR", bp="BP", p="P", snp="SNP",
with(d.highlight, points(pos, logp, col="green3", pch=20, ...))
}

}
}
27 changes: 21 additions & 6 deletions R/qq.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,28 @@ qq = function(pvector, ...) {
o = -log10(sort(pvector,decreasing=FALSE))
e = -log10( ppoints(length(pvector) ))

# Create the plot
plot(e, o, pch=20,
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p))),
xlim=c(0,max(e)), ...)

# # The old way
# plot(e, o, pch=20,
# xlab=expression(Expected~~-log[10](italic(p))),
# ylab=expression(Observed~~-log[10](italic(p))),
# ...)

# The new way to initialize the plot.
## See http://stackoverflow.com/q/23922130/654296
## First, define your default arguments
def_args <- list(pch=20, xlim=c(0, max(e)), ylim=c(0, max(o)),
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p)))
)
## Next, get a list of ... arguments
#dotargs <- as.list(match.call())[-1L]
dotargs <- list(...)
## And call the plot function passing NA, your ... arguments, and the default
## arguments that were not defined in the ... arguments.
tryCatch(do.call("plot", c(list(x=e, y=o), def_args[!names(def_args) %in% names(dotargs)], dotargs)), warn=stop)

# Add diagonal
abline(0,1,col="red")

}
}
8 changes: 8 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.onAttach <- function(...){
packageStartupMessage()
packageStartupMessage("For example usage please run: vignette('qqman')")
packageStartupMessage()
packageStartupMessage("Citation appreciated but not required:")
packageStartupMessage("Turner, S.D. qqman: an R package for visualizing GWAS results using Q-Q and manhattan plots. biorXiv DOI: 10.1101/005165 (2014).")
packageStartupMessage()
}
21 changes: 14 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# qqman: An R package for creating Q-Q and manhattan plots from GWAS results.

![qqman.gif](assets/qqman.gif)

## Citation

If you'd like to cite qqman (appreciated but not required), please cite the pre-print below:
Expand All @@ -11,21 +13,26 @@ Turner, S.D. qqman: an R package for visualizing GWAS results using Q-Q and manh
Install the stable release from CRAN:

```coffee
# Install once
install.packages("qqman")

# Load each time you use it
library(qqman)
```

Install the most recent development release with devtools (note, there be dragons here):
Or install directly from github using devtools

```coffee
# Install once with devtools
library(devtools)
install_github("stephenturner/qqman")
```

Or install the most recent development release with devtools (note, there be dragons here):

# Load
```coffee
library(devtools)
install_github("stephenturner/qqman", ref="dev")
```

Load the package each time you use it:

```coffee
library(qqman)
```

Expand Down
Binary file added assets/qqman.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
34 changes: 20 additions & 14 deletions inst/doc/qqman.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## ----, include=FALSE-----------------------------------------------------
library(qqman)
library(knitr)
opts_chunk$set(comment=NA, fig.width=12, fig.height=9, message=FALSE, tidy=TRUE)
opts_chunk$set(comment=NA, fig.width=12, fig.height=9, message=FALSE, tidy=TRUE, dpi=75)

## ----generatedata, eval=FALSE, echo=FALSE--------------------------------
# # This code used to generate the test data. Runs slow, but does the job.
Expand Down Expand Up @@ -47,24 +47,30 @@ as.data.frame(table(gwasResults$CHR))
manhattan(gwasResults)

## ------------------------------------------------------------------------
manhattan(gwasResults, main="Manhattan Plot", cex=0.5, cex.axis=0.8)
manhattan(gwasResults, main="Manhattan Plot", ylim=c(0,10), cex=0.6, cex.axis=0.9, col=c("blue4", "orange3"), suggestiveline=F, genomewideline=F, chrlabs=c(1:20, "P", "Q"))

## --------------------------------------------------------------------------------------------------------
manhattan(gwasResults, col=c("blue4", "orange3"), ymax=12)

## --------------------------------------------------------------------------------------------------------
manhattan(gwasResults, suggestiveline=F, genomewideline=F)

## --------------------------------------------------------------------------------------------------------
## ------------------------------------------------------------------------
manhattan(subset(gwasResults, CHR==1))

## --------------------------------------------------------------------------------------------------------
## ------------------------------------------------------------------------
str(snpsOfInterest)
manhattan(gwasResults, highlight=snpsOfInterest)

## --------------------------------------------------------------------------------------------------------
manhattan(subset(gwasResults, CHR==3), highlight=snpsOfInterest, main="Chr 3")
## ------------------------------------------------------------------------
manhattan(subset(gwasResults, CHR==3), highlight=snpsOfInterest, xlim=c(200, 500), main="Chr 3")

## ------------------------------------------------------------------------
# Add test statistics
gwasResults <- transform(gwasResults, zscore=qnorm(P/2, lower.tail=FALSE))
head(gwasResults)

# Make the new plot
manhattan(gwasResults, p="zscore", logp=FALSE, ylab="Z-score", genomewideline=FALSE, suggestiveline=FALSE, main="Manhattan plot of Z-scores")

## --------------------------------------------------------------------------------------------------------
qq(gwasResults$P, main="Q-Q plot of GWAS p-values")
## ------------------------------------------------------------------------
qq(gwasResults$P)

## ------------------------------------------------------------------------
qq(gwasResults$P, main="Q-Q plot of GWAS p-values",
xlim=c(0,7), ylim=c(0,12), pch=18, col="blue4", cex=1.5, las=1)

Loading

0 comments on commit 49c68e9

Please sign in to comment.