diff --git a/R/results.R b/R/results.R index 4256739..8fc2180 100644 --- a/R/results.R +++ b/R/results.R @@ -41,8 +41,8 @@ get_dtu_ids <- function(dtuo) { setorder(myt, -adp, na.last=TRUE) # Sort genes. }) - pid <- unique(myt[, "parent_id", with=FALSE]) - po <- match(dtuo$Genes[, "parent_id", with=FALSE], pid) + pid <- unique(myt$parent_id) + po <- match(dtuo$Genes$parent_id, pid) myp <- copy(dtuo$Genes[order(po), ]) with(myp, { @@ -74,11 +74,8 @@ get_dtu_ids <- function(dtuo) { #' @param dtuo A DTU object. #' @param pid A \code{parent_id} to make the plot for. #' @param style Different themes: \itemize{ -#' \item{"plain" - Grouped by condition.}, -#' \item{"paired" - Grouped by isoform.}, -#' \item{"points" - Grouped by condition. Show individual measurements as points.}, -#' \item{"pairedpnt" - Grouped by isoform. Show individual measurements as points.}, -#' \item{"lines" - (Default) Grouped by condition. Connect individual measurements with colour-coded lines.} +#' \item{"byisoform" - Grouped by isoform. Show individual measurements as points.}, +#' \item{"bycondition" - (Default) Grouped by condition. Connect individual measurements with colour-coded lines.} #' \item{"linesonly" - Grouped by condition. Connect replicate measurements as colour-coded lines. Hide the boxplots.} #' } #' @param fillby Applies to the boxplots. Not all options will work with all styles. @@ -160,72 +157,14 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N # Plot. result <- NULL - ### - if (style=="plain") { - if (is.na(fillby)) - fillby <- "condition" - if (is.na(colourby)) - colourby <- "DTU" - if (colourby=="replicate") - stop("This style cannot be coloured by replicate!") - shapeby="none" - result <- ggplot(vis_data, aes(x= isoform, y= vals, fill= vis_data[[fillby]], colour=vis_data[[colourby]])) + - facet_grid(type ~ condition, scales= "free") + - geom_boxplot(alpha=0.5, outlier.shape= NA) + - scale_fill_manual(values= colplt[[fillby]], name=fillby) + - scale_colour_manual(values= colplt[[colourby]], name=colourby) - ### - } else if ( style == "paired" ) { - if(is.na(fillby)) { - if (is.na(colourby) || colourby != "condition") { - fillby <- "condition" - } else { - fillby <- "DTU" - } - } else if (fillby != "condition"){ - if (!is.na(colourby) && colourby != "condition") { - stop("This style requires either fillby or colourby to be set to \"condition\".") - } else { - colourby <- "condition" - } - } - if(is.na(colourby)) - colourby <- "DTU" - if (colourby=="replicate") - stop("This style cannot be coloured by replicate!") - shapeby="none" - result <- ggplot(vis_data, aes(x= isoform, y= vals, fill= vis_data[[fillby]], colour=vis_data[[colourby]])) + - facet_grid(type ~ ., scales= "free") + - geom_boxplot(alpha=0.5, outlier.shape= NA, width=0.5) + - scale_fill_manual(values= colplt[[fillby]], name=fillby) + - scale_colour_manual(values= colplt[[colourby]], name=colourby) - ### - } else if (style=="points") { - if (is.na(fillby)) - fillby <- "condition" - if (is.na(colourby)) - colourby <- "DTU" - if (is.na(shapeby)) { - if (all("DTU" != c(fillby, colourby))) { - shapeby <- "DTU" - } else { - shapeby <- "none" - } - } - result <- ggplot(vis_data, aes(x= isoform, y= vals)) + - facet_grid(type ~ condition, scales= "free") + - geom_point(aes(colour= vis_data[[colourby]], shape=vis_data[[shapeby]]), position= position_jitterdodge(), stroke= rel(0.8)) + - geom_boxplot(aes(fill= vis_data[[fillby]]), alpha=0.2, outlier.shape= NA) + - scale_shape_manual(values=shaplt[[shapeby]], name=shapeby) + - scale_fill_manual(values= colplt[[fillby]], name=fillby) + - scale_colour_manual(values= colplt[[colourby]], name=colourby) - ### - } else if ( style == "pairedpnt" ) { + + ### BY ISOFORM. + if (any(style==c("byisoform", "merged"))) { if(is.na(fillby)) { - if (is.na(colourby) || colourby != "condition") { + if (is.na(colourby) || colourby != "condition" ) { fillby <- "condition" - } else { - fillby <- "DTU" + } else { + fillby <- "isoform" } } else if (fillby != "condition"){ if (!is.na(colourby) && colourby != "condition") { @@ -234,47 +173,47 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N colourby <- "condition" } } - if(is.na(colourby)) - colourby <- "DTU" - if (colourby=="replicate") - stop("This style cannot be coloured by replicate!") + if(is.na(colourby)) { + colourby <- "isoform" + } else if (colourby=="replicate") { + colourby="none" + } if (is.na(shapeby)) { if (all("DTU" != c(fillby, colourby))) { shapeby <- "DTU" + } else if (all("isoform" != c(fillby, colourby))) { + shapeby <- "isoform" } else { shapeby <- "none" } } - result <- ggplot(vis_data, aes(x= isoform, y= vals)) + + result <- ggplot(vis_data, aes(x= isoform, y= vals, colour= vis_data[[colourby]], fill= vis_data[[fillby]])) + facet_grid(type ~ ., scales= "free") + - geom_jitter(aes(colour= vis_data[[colourby]], shape=vis_data[[shapeby]]), position=position_dodge(width=0.5), stroke= rel(0.8)) + - geom_boxplot(aes(fill= vis_data[[fillby]]), alpha=0.2, outlier.shape= NA) + + geom_jitter(aes(shape=vis_data[[shapeby]]), position=position_jitterdodge(), stroke= rel(0.8)) + + geom_boxplot(position=position_dodge(), alpha=0.3, outlier.shape= NA) + scale_shape_manual(values= shaplt[[shapeby]], name=shapeby) + scale_fill_manual(values= colplt[[fillby]], name=fillby) + scale_colour_manual(values= colplt[[colourby]], name=colourby) - ### - } else if (style=="lines") { + ### BY CONDITION. + } else if (any(style==c("bycondition", "lines"))) { if (is.na(fillby)) fillby <- "DTU" - if (is.na(colourby)) - colourby <- "replicate" + colourby <- "replicate" shapeby="none" result <- ggplot(vis_data, aes(x= isoform, y= vals, fill= vis_data[[fillby]])) + facet_grid(type ~ condition, scales= "free") + geom_path(aes(colour= replicate, group= replicate)) + - geom_boxplot(alpha=0.2, outlier.shape= NA) + + geom_boxplot(alpha=0.3, outlier.shape= NA) + scale_fill_manual(values= colplt[[fillby]], name=fillby) - ### + ### BY CONDITION LINESONLY. } else if (style=="linesonly") { - if (is.na(fillby)) - fillby <- "none" - if (is.na(colourby)) - colourby <- "replicate" + fillby <- "none" + colourby <- "replicate" shapeby="none" result <- ggplot(vis_data, aes(x= isoform, y= vals, colour= replicate)) + facet_grid(type ~ condition, scales= "free") + geom_path(aes(group= replicate)) - ### + ### ERROR } else { stop("Unknown plot style.") } @@ -293,7 +232,6 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N result <- result + guides(colour="none") if (shapeby == "none") result <- result + guides(shape="none") - return(result) }) } @@ -305,14 +243,8 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N #' @param dtuo A DTU object. #' @param type Type of plot. \itemize{ #' \item{"volcano"}{Change in proportion VS. statistical significance. Done at the transcript level. (Default)} -#' \item{"maxdprop"}{Distribution of biggest change in proportion in each gene.} -#' \item{"transc_quant"}{Transcript-level quantification reproducibility threshold VS. number of DTU positive calls.} -#' \item{"gene_quant"}{Gene-level quantification reproducibility threshold VS. number of DTU positive calls.} -#' \item{"transc_rep"}{Transcript-level replication reproducibility threshold VS. number of DTU positive calls.} -#' \item{"gene_rep"}{Gene-level replication reproducibility threshold VS. number of DTU positive calls.} - -#' } -#' @return a ggplot2 object. Simply display it or you can also customize it. +#' \item{"maxdprop"}{Distribution of biggest change in proportion in each gene.} } +#' @return A ggplot2 object. Simply display it or you can also customize it. #' #' Generally uses the results of the transcript-level proportion tests. #' @@ -321,18 +253,20 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N #' @export plot_overview <- function(dtuo, type="volcano") { with(dtuo, { + ### VOLCANO if (any(type == c("gene_volcano", "volcano"))) { mydata = Transcripts[, .(target_id, Dprop, -log10(pval_corr), DTU)] names(mydata)[3] <- "neglogP" - result <- ggplot(data = mydata, aes(Dprop, neglogP, colour = DTU)) + - geom_point(alpha = 0.3) + - ggtitle("Proportion change VS significance") + - labs(x = paste("Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, sep=""), - y ="-log10 (Pval)") + - scale_color_manual(values=c("steelblue3", "red")) + - scale_x_continuous(breaks = seq(-1, 1, 0.2)) + - theme(panel.background= element_rect(fill= "grey98"), - panel.grid.major= element_line(colour= "grey95") ) + result <- ggplot(data = na.omit(mydata), aes(Dprop, neglogP, colour = DTU)) + + geom_point(alpha = 0.3) + + ggtitle("Isoform proportion change VS significance") + + labs(x = paste("Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, sep=""), + y ="-log10 (Pval)") + + scale_color_manual(values=c("steelblue3", "red")) + + scale_x_continuous(breaks = seq(-1, 1, 0.2)) + + theme(panel.background= element_rect(fill= "grey98"), + panel.grid.major= element_line(colour= "grey95") ) + ### MAXDPROP } else if (type == "maxdprop") { tmp <- copy(Transcripts) # I don't want the intermediate calculations to modify the dtu object. tmp[, abma := abs(Dprop)] @@ -342,53 +276,13 @@ plot_overview <- function(dtuo, type="volcano") { tmp[, dtu := Genes[match(tmp$Group.1, Genes[, parent_id]), Genes$DTU] ] # ok, plotting time result <- ggplot(data = na.omit(tmp), aes(x, fill=dtu)) + - geom_histogram(binwidth = 0.01, position="identity", alpha = 0.5) + - ggtitle("Distribution of largest proportion change per gene") + - labs(x = paste("abs( Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, " )", sep=""), - y ="Number of genes") + - scale_fill_manual(values=c("steelblue3", "red")) + - scale_x_continuous(breaks = seq(0, 1, 0.1)) + - scale_y_continuous(trans="sqrt") - } else if (type == "transc_quant") { - mydata <- data.frame("thresh"=seq(0, 1, 0.01), - "count"= sapply(seq(0, 1, 0.01), function(x) { - sum(Transcripts[(quant_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) })) - result <- ggplot(data = mydata, aes(thresh, count)) + - geom_freqpoly(stat= "identity", size= 1.5) + - ggtitle("Quantification reproducibility VS DTU transcripts") + - labs(x="Reproducibility threshold", - y="Number of transcripts") + - scale_x_continuous(breaks = seq(0, 1, 0.1)) - } else if (type == "gene_quant") { - mydata <- data.frame("thresh"=seq(0, 1, 0.01), - "count"= sapply(seq(0, 1, 0.01), function(x) { - sum(Genes[(quant_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) })) - result <- ggplot(data = mydata, aes(thresh, count)) + - geom_freqpoly(stat= "identity", size= 1.5) + - ggtitle("Quantification reproducibility VS DTU genes") + - labs(x="Reproducibility threshold", - y="Number of genes") + - scale_x_continuous(breaks = seq(0, 1, 0.1)) - } else if (type == "transc_rep") { - mydata <- data.frame("thresh"=seq(0, 1, 0.01), - "count"= sapply(seq(0, 1, 0.01), function(x) { - sum(Transcripts[(rep_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) })) - result <- ggplot(data = mydata, aes(thresh, count)) + - geom_freqpoly(stat= "identity", size= 1.5) + - ggtitle("Replication reproducibility VS DTU transcripts") + - labs(x="Reproducibility threshold", - y="Number of transcripts") + - scale_x_continuous(breaks = seq(0, 1, 0.1)) - } else if (type == "gene_rep") { - mydata <- data.frame("thresh"=seq(0, 1, 0.01), - "count"= sapply(seq(0, 1, 0.01), function(x) { - sum(Genes[(rep_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) })) - result <- ggplot(data = mydata, aes(thresh, count)) + - geom_freqpoly(stat= "identity", size= 1.5) + - ggtitle("Replication reproducibility VS DTU genes") + - labs(x="Reproducibility threshold", - y="Number of genes") + - scale_x_continuous(breaks = seq(0, 1, 0.1)) + geom_histogram(binwidth = 0.01, position="identity", alpha = 0.5) + + ggtitle("Distribution of largest isoform proportion change per gene") + + labs(x = paste("abs( Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, " )", sep=""), + y ="Number of genes") + + scale_fill_manual(values=c("steelblue3", "red")) + + scale_x_continuous(breaks = seq(0, 1, 0.1)) + + scale_y_continuous(trans="sqrt") } else { stop("Unrecognized plot type!") } diff --git a/inst/doc/input.Rmd b/inst/doc/input.Rmd index d42ceae..d6f2594 100644 --- a/inst/doc/input.Rmd +++ b/inst/doc/input.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Input & Settings} + %\VignetteIndexEntry{RATs 2: Input & Settings} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} diff --git a/inst/doc/intro.Rmd b/inst/doc/intro.Rmd index 5f2ade4..a9a42bc 100644 --- a/inst/doc/intro.Rmd +++ b/inst/doc/intro.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Quick start} + %\VignetteIndexEntry{RATs 1: Quick start} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/inst/doc/output.R b/inst/doc/output.R index d09cec7..43f27ee 100644 --- a/inst/doc/output.R +++ b/inst/doc/output.R @@ -26,7 +26,7 @@ print( dtu_summary(mydtu) ) ids <- get_dtu_ids(mydtu) # Contents -print( names(ids) ) +print( ids ) # DTU positive genes according to the transcript-level test. print( ids[[4]] ) @@ -51,3 +51,7 @@ print( names(mydtu$Transcripts) ) # Elements of ReplicateData print( names(mydtu$Abundances) ) +## ------------------------------------------------------------------------ +# Abundance table for first condition. +print( head(mydtu$Abundances[[1]]) ) + diff --git a/inst/doc/output.Rmd b/inst/doc/output.Rmd index f6d3e85..2f2da1a 100644 --- a/inst/doc/output.Rmd +++ b/inst/doc/output.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Output and Plots} + %\VignetteIndexEntry{RATs 3: Raw Output} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} @@ -75,7 +75,7 @@ As of `v0.4.2` it uses the same category names as `dtu_summury()` for consistenc ids <- get_dtu_ids(mydtu) # Contents -print( names(ids) ) +print( ids ) # DTU positive genes according to the transcript-level test. print( ids[[4]] ) @@ -244,6 +244,10 @@ print( names(mydtu$Abundances) ) 1. `condA` - (num) The transcript abundances in the first condition. 2. `condB` - (num) The transcript abundances in the second condition. +```{r} +# Abundance table for first condition. +print( head(mydtu$Abundances[[1]]) ) +``` *** diff --git a/inst/doc/output.html b/inst/doc/output.html index 136aead..a3f59da 100644 --- a/inst/doc/output.html +++ b/inst/doc/output.html @@ -267,22 +267,53 @@

Quick results

ids <- get_dtu_ids(mydtu) # Contents -print( names(ids) ) +print( ids ) -
##  [1] "DTU genes (gene test)"        "non-DTU genes (gene test)"   
-##  [3] "NA genes (gene test)"         "DTU genes (transc. test)"    
-##  [5] "non-DTU genes (transc. test)" "NA genes (transc. test)"     
-##  [7] "DTU genes (both tests)"       "non-DTU genes (both tests)"  
-##  [9] "NA genes (both tests)"        "DTU transcripts"             
-## [11] "non-DTU transcripts"          "NA transcripts"
+
## $`DTU genes (gene test)`
+## [1] "MIX6"
+## 
+## $`non-DTU genes (gene test)`
+## [1] "CC" "NN"
+## 
+## $`NA genes (gene test)`
+## [1] "LC"   "1A1N" "1B1C" "1D1C" "ALLA" "ALLB" "NIB" 
+## 
+## $`DTU genes (transc. test)`
+## [1] "MIX6"
+## 
+## $`non-DTU genes (transc. test)`
+## [1] "LC"   "CC"   "NN"   "1A1N" "1B1C" "1D1C" "ALLA" "ALLB" "NIB" 
+## 
+## $`NA genes (transc. test)`
+## character(0)
+## 
+## $`DTU genes (both tests)`
+## [1] "MIX6"
+## 
+## $`non-DTU genes (both tests)`
+## [1] "CC" "NN"
+## 
+## $`NA genes (both tests)`
+## character(0)
+## 
+## $`DTU transcripts`
+## [1] "MIX6.c1" "MIX6.c2"
+## 
+## $`non-DTU transcripts`
+## [1] "MIX6.c4" "LC2"     "CC_a"    "CC_b"    "MIX6.c3" "2NN"     "1NN"    
+## [8] "MIX6.nc"
+## 
+## $`NA transcripts`
+##  [1] "LC1"      "1A1N-2"   "1B1C.1"   "1B1C.2"   "1D1C:one" "1D1C:two"
+##  [7] "MIX6.d"   "ALLA1"    "ALLB1"    "ALLB2"    "NIB.1"
 
# DTU positive genes according to the transcript-level test.
 print( ids[[4]] )
 
-
## character(0)
+
## [1] "MIX6"
 

The ID lists obtained with get_dtu_ids() are ordered by effect size (Dprop).

@@ -484,6 +515,19 @@

Abundances

  • condB - (num) The transcript abundances in the second condition.
  • +
    # Abundance table for first condition.
    +print( head(mydtu$Abundances[[1]]) )
    +
    + +
    ##          V1   V2 target_id parent_id
    +## 1: 19.66667 20.5    1A1N-2      1A1N
    +## 2:  0.00000  0.0    1B1C.1      1B1C
    +## 3: 52.33333 53.5    1B1C.2      1B1C
    +## 4:  0.00000  0.0  1D1C:one      1D1C
    +## 5: 76.00000 78.0  1D1C:two      1D1C
    +## 6: 50.00000 45.0     ALLA1      ALLA
    +
    +

    Contact information

    diff --git a/inst/doc/plots.R b/inst/doc/plots.R index 3d23df3..aaf8484 100644 --- a/inst/doc/plots.R +++ b/inst/doc/plots.R @@ -18,43 +18,30 @@ mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", name_B = "p of RATs.") ## ------------------------------------------------------------------------ -# Split by condition for easier view of the overall gene profile. -plot_gene(mydtu, "MIX6", style="plain") +# Grouping by condition (DEAFULT): +# plot_gene(mydtu, "MIX6") +plot_gene(mydtu, "MIX6", style="bycondition") ## ------------------------------------------------------------------------ -# Isoforms paired up for easier individual comparisons. -plot_gene(mydtu, "MIX6", style="paired") - -## ------------------------------------------------------------------------ -# Split by condition. -plot_gene(mydtu, "MIX6", style="points") - -## ------------------------------------------------------------------------ -# Paired by isoform. -plot_gene(mydtu, "MIX6", style="pairedpnt") - -## ------------------------------------------------------------------------ -# Split by condition. -# This is the DEFAULT view if the style is omitted, as it is the most informative. -plot_gene(mydtu, "MIX6", style="lines") +# Grouping by condition (minimalist): +plot_gene(mydtu, "MIX6", style="linesonly") ## ------------------------------------------------------------------------ -# A cleaner version, although it no longer shows which isoforms are DTU. -plot_gene(mydtu, "MIX6", style="linesonly") +# Grouping by isoform: +plot_gene(mydtu, "MIX6", style="byisoform") ## ------------------------------------------------------------------------ -# You can change the information that is colour-coded. -plot_gene(mydtu, "MIX6", style="plain", fillby="DTU") -plot_gene(mydtu, "MIX6", style="points", fillby="isoform", colourby="replicate") -plot_gene(mydtu, "MIX6", style="pairedpnt", colourby="isoform", shapeby="replicate") +# Change the encoded information. +plot_gene(mydtu, "MIX6", style="bycondition", fillby="isoform") +plot_gene(mydtu, "MIX6", style="byisoform", colourby="DTU", shapeby="replicate") -# For a less colourful look, the layered information can be disabled. -plot_gene(mydtu, "MIX6", style="points", fillby="none", colourby="none", shapeby="none") +# For a less busy look, any of the information layers can be disabled. +plot_gene(mydtu, "MIX6", style="byisoform", colourby="none", shapeby="none") ## ------------------------------------------------------------------------ -# You can also customise the colours used by specifying new values for +# Colour codes can be customised by specifying new values for # condcolvec, replcolvec, isofcolvec, dtucolvec and nonecol. -plot_gene(mydtu, "MIX6", style="lines", fillby="condition", condcolvec=c("magenta", "cyan")) +plot_gene(mydtu, "MIX6", style="bycondition", fillby="condition", condcolvec=c("magenta", "cyan")) ## ------------------------------------------------------------------------ # Proportion change VS significance. diff --git a/inst/doc/plots.Rmd b/inst/doc/plots.Rmd index b3c9c28..e231c08 100644 --- a/inst/doc/plots.Rmd +++ b/inst/doc/plots.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Output and Plots} + %\VignetteIndexEntry{RATs 4: Plots} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} @@ -47,7 +47,8 @@ mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", name_B = "p # Visualisation of results -The output object's tables provide a host of information. But a good plot is worth a thousand numbers. +The RATs output object provides a host of information and we encourage users to familiarize themselves with it. +But a good plot is worth a thousand numbers. ## Isoform abundances for a given gene @@ -55,82 +56,72 @@ The output object's tables provide a host of information. But a good plot is wor This function allows you to visualise what's going on in any particular gene. Both the absolute counts and the relative proportions are plotted for each transcript. This is a very useful function for inspecting a gene of interest. It enables quick visual evaluation of the dispersion of the replicate measurements, the magnitude of the proportion change, the -presence of outliers, and the consistency among the replicates. There are several styles for this plot, depending on your preferences. +presence of outliers, and the consistency among the replicates. -The simplest option is to represent the measurements from your replicates as boxplots: +By default, these plots can be quite colourful (and possibly ugly) as their aim is to highlight which isoforms are predicted to be DTU. +Options are provided to change which information layers are encoded and what colours are used. -```{r} -# Split by condition for easier view of the overall gene profile. -plot_gene(mydtu, "MIX6", style="plain") -``` +There are several styles for this plot, depending on your preferences. The default plot format includes the most information: ```{r} -# Isoforms paired up for easier individual comparisons. -plot_gene(mydtu, "MIX6", style="paired") +# Grouping by condition (DEAFULT): +# plot_gene(mydtu, "MIX6") +plot_gene(mydtu, "MIX6", style="bycondition") ``` -Boxplots, however, can hide the real distribution of your data, which could be multi-modal. Adding the actual measurements -resolves that: +The top two facets show the absolute abundances (counts) of the isoforms, the bottom two show the relative abundances (proportions). +The left two facets refer to one condition, the right two ones to the other condition. The boxplots describe the abundance measurements +of each isoform across replicates. As the proportions of isoforms are inter-dependent and add up to 100%, the coloured lines connect +the isoform abundances measured in each replicate. By default, the boxes' fill-colour encodes the transcript level DTU prediction for +each isoform: In this example, isoforms `.c1` and `.c2` change significantly, isoforms `.c3`, `.c4` and `.nc` do not change significantly +and isoform `.d` could not be tested (for very narrow boxes, the fill colour is difficult to see). -```{r} -# Split by condition. -plot_gene(mydtu, "MIX6", style="points") -``` +This structure is great for seeing changes in the isoform distribution profile of a gene. A cleaner versions of the plot can also be obtained: ```{r} -# Paired by isoform. -plot_gene(mydtu, "MIX6", style="pairedpnt") +# Grouping by condition (minimalist): +plot_gene(mydtu, "MIX6", style="linesonly") ``` -Finally, relative abundance measurements are not independent. The measurements from each replicate are tied together. -Connecting the measurements of each replicate reveals the level of consistency of the relative abundances across the replicates. - -```{r} -# Split by condition. -# This is the DEFAULT view if the style is omitted, as it is the most informative. -plot_gene(mydtu, "MIX6", style="lines") -``` +When there are many lines or many isoforms, it may be preferable to group abundances by isoform, making individual comparisons easier: ```{r} -# A cleaner version, although it no longer shows which isoforms are DTU. -plot_gene(mydtu, "MIX6", style="linesonly") +# Grouping by isoform: +plot_gene(mydtu, "MIX6", style="byisoform") ``` ### Customisation of the gene plot -There are options to change the colour coding of the gene plot. Bear in mind that, some -of the options are not available for all styles. If neither `fillby` and `colourby` is -set to `"DTU"`, and `shapeby` is undefined, the role of encoding DTU is automatically -assigned to `shapeby`. +#### Change information layers + +The `fillby`, `colourby` and `shapeby` parameters can be respectively used to control which information layers +are encoded as fill, line/point colour, and point shape. Possible values are `c("isoform", "condition", "DTU", "none", "replicate")`. +Not all options are available in all styles, in which case they will be silently ignored. ```{r} -# You can change the information that is colour-coded. -plot_gene(mydtu, "MIX6", style="plain", fillby="DTU") -plot_gene(mydtu, "MIX6", style="points", fillby="isoform", colourby="replicate") -plot_gene(mydtu, "MIX6", style="pairedpnt", colourby="isoform", shapeby="replicate") +# Change the encoded information. +plot_gene(mydtu, "MIX6", style="bycondition", fillby="isoform") +plot_gene(mydtu, "MIX6", style="byisoform", colourby="DTU", shapeby="replicate") -# For a less colourful look, the layered information can be disabled. -plot_gene(mydtu, "MIX6", style="points", fillby="none", colourby="none", shapeby="none") +# For a less busy look, any of the information layers can be disabled. +plot_gene(mydtu, "MIX6", style="byisoform", colourby="none", shapeby="none") ``` +#### Change colour code + ```{r} -# You can also customise the colours used by specifying new values for +# Colour codes can be customised by specifying new values for # condcolvec, replcolvec, isofcolvec, dtucolvec and nonecol. -plot_gene(mydtu, "MIX6", style="lines", fillby="condition", condcolvec=c("magenta", "cyan")) +plot_gene(mydtu, "MIX6", style="bycondition", fillby="condition", condcolvec=c("magenta", "cyan")) ``` - - ## Plots of overall run Our simulated dataset is too small to properly demonstrate what these plots typically look like. So each one is accompanied by a static image of the same plot created with a real and much larger dataset. -Several of these plots are **likely to display warnings** about missing or non-finite values. These are due to the -presence of `NA` in the tables, where entries did not pass the thresholds, and can be ignored. - Possibly the most common plot in differential expression is the volcano plot, which plots the effect size against the statistical significance. As it is difficult to define a single p-value and a single effect size at the gene level, the volcano can only be plotted at the transcript level. @@ -141,7 +132,7 @@ plot_overview(mydtu, type="volcano") ``` This is what it looks like on a larger dataset: -![Dprop VS sig](./fig/volcano.jpg) +![Dprop VS sig](./fig/volcano.png) The next command plots the largest change in proportion seen within each gene, against the number of genes showing such change. This is a way to inspect what effect sizes are present in the data. As an additional layer of information, @@ -153,7 +144,7 @@ plot_overview(mydtu, type="maxdprop") ``` This is what it looks like on a larger dataset: -![Max Dprop](./fig/maxdprop.jpg) +![Max Dprop](./fig/maxdprop.png) ### Interactive plots diff --git a/inst/doc/plots.html b/inst/doc/plots.html index cede938..4a622bb 100644 --- a/inst/doc/plots.html +++ b/inst/doc/plots.html @@ -225,104 +225,90 @@

    Visualisation of results

    -

    The output object's tables provide a host of information. But a good plot is worth a thousand numbers.

    +

    The RATs output object provides a host of information and we encourage users to familiarize themselves with it. +But a good plot is worth a thousand numbers.

    Isoform abundances for a given gene

    This function allows you to visualise what's going on in any particular gene. Both the absolute counts and the relative proportions are plotted for each transcript. This is a very useful function for inspecting a gene of interest. It enables quick visual evaluation of the dispersion of the replicate measurements, the magnitude of the proportion change, the -presence of outliers, and the consistency among the replicates. There are several styles for this plot, depending on your preferences.

    +presence of outliers, and the consistency among the replicates.

    -

    The simplest option is to represent the measurements from your replicates as boxplots:

    +

    By default, these plots can be quite colourful (and possibly ugly) as their aim is to highlight which isoforms are predicted to be DTU. +Options are provided to change which information layers are encoded and what colours are used.

    -
    # Split by condition for easier view of the overall gene profile.
    -plot_gene(mydtu, "MIX6", style="plain")
    -
    - -

    plot of chunk unnamed-chunk-2

    +

    There are several styles for this plot, depending on your preferences. The default plot format includes the most information:

    -
    # Isoforms paired up for easier individual comparisons.
    -plot_gene(mydtu, "MIX6", style="paired")
    +
    # Grouping by condition (DEAFULT):
    +#   plot_gene(mydtu, "MIX6")
    +plot_gene(mydtu, "MIX6", style="bycondition")
     
    -

    plot of chunk unnamed-chunk-3

    +

    plot of chunk unnamed-chunk-2

    -

    Boxplots, however, can hide the real distribution of your data, which could be multi-modal. Adding the actual measurements -resolves that:

    +

    The top two facets show the absolute abundances (counts) of the isoforms, the bottom two show the relative abundances (proportions). +The left two facets refer to one condition, the right two ones to the other condition. The boxplots describe the abundance measurements +of each isoform across replicates. As the proportions of isoforms are inter-dependent and add up to 100%, the coloured lines connect +the isoform abundances measured in each replicate. By default, the boxes' fill-colour encodes the transcript level DTU prediction for +each isoform: In this example, isoforms .c1 and .c2 change significantly, isoforms .c3, .c4 and .nc do not change significantly +and isoform .d could not be tested (for very narrow boxes, the fill colour is difficult to see).

    -
    # Split by condition.
    -plot_gene(mydtu, "MIX6", style="points")
    -
    +

    This structure is great for seeing changes in the isoform distribution profile of a gene. A cleaner versions of the plot can also be obtained:

    -

    plot of chunk unnamed-chunk-4

    - -
    # Paired by isoform.
    -plot_gene(mydtu, "MIX6", style="pairedpnt")
    +
    # Grouping by condition (minimalist):
    +plot_gene(mydtu, "MIX6", style="linesonly")
     
    -

    plot of chunk unnamed-chunk-5

    +

    plot of chunk unnamed-chunk-3

    -

    Finally, relative abundance measurements are not independent. The measurements from each replicate are tied together. -Connecting the measurements of each replicate reveals the level of consistency of the relative abundances across the replicates.

    +

    When there are many lines or many isoforms, it may be preferable to group abundances by isoform, making individual comparisons easier:

    -
    # Split by condition.
    -# This is the DEFAULT view if the style is omitted, as it is the most informative.
    -plot_gene(mydtu, "MIX6", style="lines")
    +
    # Grouping by isoform:
    +plot_gene(mydtu, "MIX6", style="byisoform")
     
    -

    plot of chunk unnamed-chunk-6

    - -
    # A cleaner version, although it no longer shows which isoforms are DTU.
    -plot_gene(mydtu, "MIX6", style="linesonly")
    -
    - -

    plot of chunk unnamed-chunk-7

    +

    plot of chunk unnamed-chunk-4

    Customisation of the gene plot

    -

    There are options to change the colour coding of the gene plot. Bear in mind that, some -of the options are not available for all styles. If neither fillby and colourby is -set to "DTU", and shapeby is undefined, the role of encoding DTU is automatically -assigned to shapeby.

    +

    Change information layers

    -
    # You can change the information that is colour-coded.
    -plot_gene(mydtu, "MIX6", style="plain", fillby="DTU")
    -
    +

    The fillby, colourby and shapeby parameters can be respectively used to control which information layers +are encoded as fill, line/point colour, and point shape. Possible values are c("isoform", "condition", "DTU", "none", "replicate"). +Not all options are available in all styles, in which case they will be silently ignored.

    -

    plot of chunk unnamed-chunk-8

    - -
    plot_gene(mydtu, "MIX6", style="points", fillby="isoform", colourby="replicate")
    +
    # Change the encoded information.
    +plot_gene(mydtu, "MIX6", style="bycondition", fillby="isoform")
     
    -

    plot of chunk unnamed-chunk-8

    +

    plot of chunk unnamed-chunk-5

    -
    plot_gene(mydtu, "MIX6", style="pairedpnt", colourby="isoform", shapeby="replicate")
    +
    plot_gene(mydtu, "MIX6", style="byisoform", colourby="DTU", shapeby="replicate")
     
    -

    plot of chunk unnamed-chunk-8

    +

    plot of chunk unnamed-chunk-5

    -
    # For a less colourful look, the layered information can be disabled.
    -plot_gene(mydtu, "MIX6", style="points", fillby="none", colourby="none", shapeby="none")
    +
    # For a less busy look, any of the information layers can be disabled.
    +plot_gene(mydtu, "MIX6", style="byisoform", colourby="none", shapeby="none")
     
    -

    plot of chunk unnamed-chunk-8

    +

    plot of chunk unnamed-chunk-5

    + +

    Change colour code

    -
    # You can also customise the colours used by specifying new values for
    +
    # Colour codes can be customised by specifying new values for
     # condcolvec, replcolvec, isofcolvec, dtucolvec and nonecol.
    -plot_gene(mydtu, "MIX6", style="lines", fillby="condition", condcolvec=c("magenta", "cyan"))
    +plot_gene(mydtu, "MIX6", style="bycondition", fillby="condition", condcolvec=c("magenta", "cyan"))
     
    -

    plot of chunk unnamed-chunk-9

    +

    plot of chunk unnamed-chunk-6

    Plots of overall run

    Our simulated dataset is too small to properly demonstrate what these plots typically look like. So each one is accompanied by a static image of the same plot created with a real and much larger dataset.

    -

    Several of these plots are likely to display warnings about missing or non-finite values. These are due to the -presence of NA in the tables, where entries did not pass the thresholds, and can be ignored.

    -

    Possibly the most common plot in differential expression is the volcano plot, which plots the effect size against the statistical significance. As it is difficult to define a single p-value and a single effect size at the gene level, the volcano can only be plotted at the transcript level.

    @@ -331,13 +317,10 @@

    Plots of overall run

    plot_overview(mydtu, type="volcano")
    -
    ## Warning: Removed 11 rows containing missing values (geom_point).
    -
    - -

    plot of chunk unnamed-chunk-10

    +

    plot of chunk unnamed-chunk-7

    This is what it looks like on a larger dataset: -Dprop VS sig

    +Dprop VS sig

    The next command plots the largest change in proportion seen within each gene, against the number of genes showing such change. This is a way to inspect what effect sizes are present in the data. As an additional layer of information, @@ -347,10 +330,10 @@

    Plots of overall run

    plot_overview(mydtu, type="maxdprop")
    -

    plot of chunk unnamed-chunk-11

    +

    plot of chunk unnamed-chunk-8

    This is what it looks like on a larger dataset: -Max Dprop

    +Max Dprop

    Interactive plots

    @@ -385,20 +368,14 @@

    Plot customisation

    myplot # display
    -
    ## Warning: Removed 11 rows containing missing values (geom_point).
    -
    - -

    plot of chunk unnamed-chunk-13

    +

    plot of chunk unnamed-chunk-10

    # Change title. 
     myplot2 <- myplot + ggtitle("MY EPIC TITLE")
     myplot2
     
    -
    ## Warning: Removed 11 rows containing missing values (geom_point).
    -
    - -

    plot of chunk unnamed-chunk-13

    +

    plot of chunk unnamed-chunk-10


    diff --git a/man/plot_gene.Rd b/man/plot_gene.Rd index 9d42ace..6a747f2 100644 --- a/man/plot_gene.Rd +++ b/man/plot_gene.Rd @@ -17,11 +17,8 @@ plot_gene(dtuo, pid, style = "lines", fillby = NA_character_, \item{pid}{A \code{parent_id} to make the plot for.} \item{style}{Different themes: \itemize{ -\item{"plain" - Grouped by condition.}, -\item{"paired" - Grouped by isoform.}, -\item{"points" - Grouped by condition. Show individual measurements as points.}, -\item{"pairedpnt" - Grouped by isoform. Show individual measurements as points.}, -\item{"lines" - (Default) Grouped by condition. Connect individual measurements with colour-coded lines.} +\item{"byisoform" - Grouped by isoform. Show individual measurements as points.}, +\item{"bycondition" - (Default) Grouped by condition. Connect individual measurements with colour-coded lines.} \item{"linesonly" - Grouped by condition. Connect replicate measurements as colour-coded lines. Hide the boxplots.} }} diff --git a/man/plot_overview.Rd b/man/plot_overview.Rd index 5e5d14c..6bd8f08 100644 --- a/man/plot_overview.Rd +++ b/man/plot_overview.Rd @@ -10,16 +10,11 @@ plot_overview(dtuo, type = "volcano") \item{dtuo}{A DTU object.} \item{type}{Type of plot. \itemize{ - \item{"volcano"}{Change in proportion VS. statistical significance. Done at the transcript level. (Default)} - \item{"maxdprop"}{Distribution of biggest change in proportion in each gene.} - \item{"transc_quant"}{Transcript-level quantification reproducibility threshold VS. number of DTU positive calls.} - \item{"gene_quant"}{Gene-level quantification reproducibility threshold VS. number of DTU positive calls.} - \item{"transc_rep"}{Transcript-level replication reproducibility threshold VS. number of DTU positive calls.} - \item{"gene_rep"}{Gene-level replication reproducibility threshold VS. number of DTU positive calls.} -}} +\item{"volcano"}{Change in proportion VS. statistical significance. Done at the transcript level. (Default)} +\item{"maxdprop"}{Distribution of biggest change in proportion in each gene.} }} } \value{ -a ggplot2 object. Simply display it or you can also customize it. +A ggplot2 object. Simply display it or you can also customize it. Generally uses the results of the transcript-level proportion tests. } diff --git a/tests/testthat/test_2_interface.R b/tests/testthat/test_2_interface.R index 21bcce2..861c61b 100644 --- a/tests/testthat/test_2_interface.R +++ b/tests/testthat/test_2_interface.R @@ -465,15 +465,52 @@ test_that("The summaries work", { }) #============================================================================== -test_that("The plotting commands work", { +test_that("The gene plotting commands work", { sim <- sim_sleuth_data(cnames=c("ONE","TWO")) mydtu <- call_DTU(annot= sim$annot, slo= sim$slo, name_A= "ONE", name_B= "TWO", qbootnum=2, verbose = FALSE) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6")) expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="lines")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="merged")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="linesonly")) + + expect_error(plot_gene(dtuo=mydtu, pid="MIX6", fillby="replicate")) + expect_error(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", fillby="garbage")) + expect_error(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", colourby="garbage")) + expect_error(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", shapeby="garbage")) + expect_error(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", fillby="none", colourby="none")) + + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", fillby="none")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", fillby="isoform")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", fillby="condition")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", fillby="DTU")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", colourby="replicate")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="bycondition", shapeby="replicate")) + + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", fillby="isoform")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", fillby="DTU")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", fillby="none")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", fillby="condition")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", colourby="isoform")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", colourby="condition")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", colourby="DTU")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", colourby="replicate")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", colourby="none")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", shapeby="isoform")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", shapeby="condition")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", shapeby="DTU")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", shapeby="replicate")) + expect_silent(plot_gene(dtuo=mydtu, pid="MIX6", style="byisoform", shapeby="none")) +}) + +#============================================================================== +test_that("The gene plotting commands work", { + sim <- sim_sleuth_data(cnames=c("ONE","TWO")) + mydtu <- call_DTU(annot= sim$annot, slo= sim$slo, name_A= "ONE", name_B= "TWO", qbootnum=2, verbose = FALSE) + expect_silent(plot_overview(mydtu)) + expect_silent(plot_overview(dtuo=mydtu, type="volcano")) expect_silent(plot_overview(dtuo=mydtu, type="maxdprop")) - expect_silent(plot_overview(mydtu, "transc_quant")) - expect_silent(plot_overview(mydtu, "gene_quant")) - expect_silent(plot_overview(mydtu, "transc_rep")) - expect_silent(plot_overview(mydtu, "gene_rep")) }) diff --git a/vignettes/fig/maxdprop.jpg b/vignettes/fig/maxdprop.jpg deleted file mode 100644 index e3fd7b6..0000000 Binary files a/vignettes/fig/maxdprop.jpg and /dev/null differ diff --git a/vignettes/fig/maxdprop.png b/vignettes/fig/maxdprop.png new file mode 100644 index 0000000..3ab2b3f Binary files /dev/null and b/vignettes/fig/maxdprop.png differ diff --git a/vignettes/fig/volcano.jpg b/vignettes/fig/volcano.jpg deleted file mode 100644 index 8ccffb6..0000000 Binary files a/vignettes/fig/volcano.jpg and /dev/null differ diff --git a/vignettes/fig/volcano.png b/vignettes/fig/volcano.png new file mode 100644 index 0000000..aaf9616 Binary files /dev/null and b/vignettes/fig/volcano.png differ diff --git a/vignettes/input.Rmd b/vignettes/input.Rmd index d42ceae..d6f2594 100644 --- a/vignettes/input.Rmd +++ b/vignettes/input.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Input & Settings} + %\VignetteIndexEntry{RATs 2: Input & Settings} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd index 5f2ade4..a9a42bc 100644 --- a/vignettes/intro.Rmd +++ b/vignettes/intro.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Quick start} + %\VignetteIndexEntry{RATs 1: Quick start} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/output.Rmd b/vignettes/output.Rmd index f6d3e85..2f2da1a 100644 --- a/vignettes/output.Rmd +++ b/vignettes/output.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Output and Plots} + %\VignetteIndexEntry{RATs 3: Raw Output} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} @@ -75,7 +75,7 @@ As of `v0.4.2` it uses the same category names as `dtu_summury()` for consistenc ids <- get_dtu_ids(mydtu) # Contents -print( names(ids) ) +print( ids ) # DTU positive genes according to the transcript-level test. print( ids[[4]] ) @@ -244,6 +244,10 @@ print( names(mydtu$Abundances) ) 1. `condA` - (num) The transcript abundances in the first condition. 2. `condB` - (num) The transcript abundances in the second condition. +```{r} +# Abundance table for first condition. +print( head(mydtu$Abundances[[1]]) ) +``` *** diff --git a/vignettes/plots.Rmd b/vignettes/plots.Rmd index b3c9c28..e231c08 100644 --- a/vignettes/plots.Rmd +++ b/vignettes/plots.Rmd @@ -9,7 +9,7 @@ output: theme: readable toc: yes vignette: > - %\VignetteIndexEntry{RATs Output and Plots} + %\VignetteIndexEntry{RATs 4: Plots} %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} @@ -47,7 +47,8 @@ mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", name_B = "p # Visualisation of results -The output object's tables provide a host of information. But a good plot is worth a thousand numbers. +The RATs output object provides a host of information and we encourage users to familiarize themselves with it. +But a good plot is worth a thousand numbers. ## Isoform abundances for a given gene @@ -55,82 +56,72 @@ The output object's tables provide a host of information. But a good plot is wor This function allows you to visualise what's going on in any particular gene. Both the absolute counts and the relative proportions are plotted for each transcript. This is a very useful function for inspecting a gene of interest. It enables quick visual evaluation of the dispersion of the replicate measurements, the magnitude of the proportion change, the -presence of outliers, and the consistency among the replicates. There are several styles for this plot, depending on your preferences. +presence of outliers, and the consistency among the replicates. -The simplest option is to represent the measurements from your replicates as boxplots: +By default, these plots can be quite colourful (and possibly ugly) as their aim is to highlight which isoforms are predicted to be DTU. +Options are provided to change which information layers are encoded and what colours are used. -```{r} -# Split by condition for easier view of the overall gene profile. -plot_gene(mydtu, "MIX6", style="plain") -``` +There are several styles for this plot, depending on your preferences. The default plot format includes the most information: ```{r} -# Isoforms paired up for easier individual comparisons. -plot_gene(mydtu, "MIX6", style="paired") +# Grouping by condition (DEAFULT): +# plot_gene(mydtu, "MIX6") +plot_gene(mydtu, "MIX6", style="bycondition") ``` -Boxplots, however, can hide the real distribution of your data, which could be multi-modal. Adding the actual measurements -resolves that: +The top two facets show the absolute abundances (counts) of the isoforms, the bottom two show the relative abundances (proportions). +The left two facets refer to one condition, the right two ones to the other condition. The boxplots describe the abundance measurements +of each isoform across replicates. As the proportions of isoforms are inter-dependent and add up to 100%, the coloured lines connect +the isoform abundances measured in each replicate. By default, the boxes' fill-colour encodes the transcript level DTU prediction for +each isoform: In this example, isoforms `.c1` and `.c2` change significantly, isoforms `.c3`, `.c4` and `.nc` do not change significantly +and isoform `.d` could not be tested (for very narrow boxes, the fill colour is difficult to see). -```{r} -# Split by condition. -plot_gene(mydtu, "MIX6", style="points") -``` +This structure is great for seeing changes in the isoform distribution profile of a gene. A cleaner versions of the plot can also be obtained: ```{r} -# Paired by isoform. -plot_gene(mydtu, "MIX6", style="pairedpnt") +# Grouping by condition (minimalist): +plot_gene(mydtu, "MIX6", style="linesonly") ``` -Finally, relative abundance measurements are not independent. The measurements from each replicate are tied together. -Connecting the measurements of each replicate reveals the level of consistency of the relative abundances across the replicates. - -```{r} -# Split by condition. -# This is the DEFAULT view if the style is omitted, as it is the most informative. -plot_gene(mydtu, "MIX6", style="lines") -``` +When there are many lines or many isoforms, it may be preferable to group abundances by isoform, making individual comparisons easier: ```{r} -# A cleaner version, although it no longer shows which isoforms are DTU. -plot_gene(mydtu, "MIX6", style="linesonly") +# Grouping by isoform: +plot_gene(mydtu, "MIX6", style="byisoform") ``` ### Customisation of the gene plot -There are options to change the colour coding of the gene plot. Bear in mind that, some -of the options are not available for all styles. If neither `fillby` and `colourby` is -set to `"DTU"`, and `shapeby` is undefined, the role of encoding DTU is automatically -assigned to `shapeby`. +#### Change information layers + +The `fillby`, `colourby` and `shapeby` parameters can be respectively used to control which information layers +are encoded as fill, line/point colour, and point shape. Possible values are `c("isoform", "condition", "DTU", "none", "replicate")`. +Not all options are available in all styles, in which case they will be silently ignored. ```{r} -# You can change the information that is colour-coded. -plot_gene(mydtu, "MIX6", style="plain", fillby="DTU") -plot_gene(mydtu, "MIX6", style="points", fillby="isoform", colourby="replicate") -plot_gene(mydtu, "MIX6", style="pairedpnt", colourby="isoform", shapeby="replicate") +# Change the encoded information. +plot_gene(mydtu, "MIX6", style="bycondition", fillby="isoform") +plot_gene(mydtu, "MIX6", style="byisoform", colourby="DTU", shapeby="replicate") -# For a less colourful look, the layered information can be disabled. -plot_gene(mydtu, "MIX6", style="points", fillby="none", colourby="none", shapeby="none") +# For a less busy look, any of the information layers can be disabled. +plot_gene(mydtu, "MIX6", style="byisoform", colourby="none", shapeby="none") ``` +#### Change colour code + ```{r} -# You can also customise the colours used by specifying new values for +# Colour codes can be customised by specifying new values for # condcolvec, replcolvec, isofcolvec, dtucolvec and nonecol. -plot_gene(mydtu, "MIX6", style="lines", fillby="condition", condcolvec=c("magenta", "cyan")) +plot_gene(mydtu, "MIX6", style="bycondition", fillby="condition", condcolvec=c("magenta", "cyan")) ``` - - ## Plots of overall run Our simulated dataset is too small to properly demonstrate what these plots typically look like. So each one is accompanied by a static image of the same plot created with a real and much larger dataset. -Several of these plots are **likely to display warnings** about missing or non-finite values. These are due to the -presence of `NA` in the tables, where entries did not pass the thresholds, and can be ignored. - Possibly the most common plot in differential expression is the volcano plot, which plots the effect size against the statistical significance. As it is difficult to define a single p-value and a single effect size at the gene level, the volcano can only be plotted at the transcript level. @@ -141,7 +132,7 @@ plot_overview(mydtu, type="volcano") ``` This is what it looks like on a larger dataset: -![Dprop VS sig](./fig/volcano.jpg) +![Dprop VS sig](./fig/volcano.png) The next command plots the largest change in proportion seen within each gene, against the number of genes showing such change. This is a way to inspect what effect sizes are present in the data. As an additional layer of information, @@ -153,7 +144,7 @@ plot_overview(mydtu, type="maxdprop") ``` This is what it looks like on a larger dataset: -![Max Dprop](./fig/maxdprop.jpg) +![Max Dprop](./fig/maxdprop.png) ### Interactive plots