diff --git a/inst/doc/tutorial.R b/inst/doc/tutorial.R index 6f5a3a4..a9088dd 100644 --- a/inst/doc/tutorial.R +++ b/inst/doc/tutorial.R @@ -149,13 +149,13 @@ plot_gene(mydtu, "MIX6", vals="proportions") # The ERROR BARS represent 2 standard deviations from the mean count across replicates. plot_gene(mydtu, "MIX6", vals="counts") -## ------------------------------------------------------------------------ -# Proportion change VS significance. -plot_overview(mydtu, type="volcano") +## ----eval=FALSE---------------------------------------------------------- +# # Proportion change VS significance. +# plot_overview(mydtu, type="volcano") -## ------------------------------------------------------------------------ -# Distribution of maximum proportion change. -plot_overview(mydtu, type="maxdprop") +## ----eval=FALSE---------------------------------------------------------- +# # Distribution of maximum proportion change. +# plot_overview(mydtu, type="maxdprop") ## ------------------------------------------------------------------------ library(ggplot2) @@ -163,8 +163,8 @@ library(ggplot2) myplot <- plot_overview(mydtu, "volcano") myplot # display -# Change scale of y axis to linear. -myplot2 <- myplot + scale_y_continuous(trans = "identity") +# Change title. +myplot2 <- myplot + ggtitle("My epic title") myplot2 ## ----eval=FALSE---------------------------------------------------------- diff --git a/inst/doc/tutorial.Rmd b/inst/doc/tutorial.Rmd index 2ead84d..bbfda1f 100644 --- a/inst/doc/tutorial.Rmd +++ b/inst/doc/tutorial.Rmd @@ -591,24 +591,24 @@ Possibly the most common plot in differential expression is the volcano plot, wh the statistical significance. As it is difficult to define a single p-value and a single effect size at the gene level, the volcano can only be plotted at the transcript level. -```{r} +```{r eval=FALSE} # Proportion change VS significance. plot_overview(mydtu, type="volcano") ``` -And this is what it looks like on a larger dataset: +This is what it looks like on a larger dataset: ![Dprop VS sig](./fig/volcano.jpg) The next command plots the largest change in proportion seen within each gene, against the number of genes showing such change. This is a way to inspect what effect sizes are present in the data. As an additional layer of information, they are colour-coded by their DTU call. -```{r} +```{r eval=FALSE} # Distribution of maximum proportion change. plot_overview(mydtu, type="maxdprop") ``` -And this is what it looks like on a larger dataset: +This is what it looks like on a larger dataset: ![Max Dprop](./fig/maxdprop.jpg) @@ -624,8 +624,8 @@ library(ggplot2) myplot <- plot_overview(mydtu, "volcano") myplot # display -# Change scale of y axis to linear. -myplot2 <- myplot + scale_y_continuous(trans = "identity") +# Change title. +myplot2 <- myplot + ggtitle("My epic title") myplot2 ``` @@ -714,8 +714,7 @@ mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", As measure of confidence in the DTU calls, we take the fraction of iterations that returned a positive DTU call for the given gene/transcript. A positive call is considered confident if the fraction of positive calls exceeds the threshold (`>= conf_thresh`). A negative call is considered confident if the fraction of positive calls is below the complement of the threshold (`<= 1 - conf_thresh`). - -The default confidence threshold is somewhat strict, but can be over-ridden. +The default confidence threshold can be over-ridden. ```{r eval=FALSE} mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", diff --git a/inst/doc/tutorial.html b/inst/doc/tutorial.html index 07d8c0d..04be228 100644 --- a/inst/doc/tutorial.html +++ b/inst/doc/tutorial.html @@ -577,7 +577,7 @@

Example results

## [1] "Using a simulated sleuth object for the purposes of the tutorial.\n Simulated using built-in functionality of `rats`." ## ## $rats_version -## [1] '0.2.0' +## [1] '0.2.1' ## ## $R_version ## $R_version$platform @@ -657,22 +657,22 @@

Example results

## 3: NA NA NA NA NA ## 4: NA NA NA NA NA ## 5: NA NA NA NA NA -## 6: 0.73 FALSE 0.003622041 0.001149198 0.004390658 +## 6: 0.84 FALSE 0.002214194 0.0006600624 0.003227884 ## 7: NA NA NA NA NA -## 8: 1.00 TRUE 0.000000000 0.000000000 0.000000000 +## 8: 1.00 TRUE 0.000000000 0.0000000000 0.000000000 ## 9: NA NA NA NA NA -## 10: 0.00 TRUE 0.788718175 0.712346523 0.139150241 +## 10: 0.00 TRUE 0.786287881 0.7068863918 0.142005870 ## boot_p_stdevBA boot_p_minAB boot_p_minBA boot_p_maxAB boot_p_maxBA ## 1: NA NA NA NA NA ## 2: NA NA NA NA NA ## 3: NA NA NA NA NA ## 4: NA NA NA NA NA ## 5: NA NA NA NA NA -## 6: 0.001471376 5.451397e-05 1.550451e-05 0.0154553 0.00480727 +## 6: 0.00109766 5.030128e-05 1.550451e-05 0.0154553 0.00480727 ## 7: NA NA NA NA NA -## 8: 0.000000000 0.000000e+00 0.000000e+00 0.0000000 0.00000000 +## 8: 0.00000000 0.000000e+00 0.000000e+00 0.0000000 0.00000000 ## 9: NA NA NA NA NA -## 10: 0.191607688 5.143483e-01 3.311102e-01 0.9915083 0.98871978 +## 10: 0.19490033 5.143483e-01 3.311102e-01 0.9915083 0.98871978 ## boot_na ## 1: NA ## 2: NA @@ -778,19 +778,19 @@

Example results

## 6: NA NA NA NA NA NA ## 7: NA NA NA NA NA NA ## 8: NA NA NA NA NA NA -## 9: TRUE 0.58 FALSE 4.345906e-02 3.114916e-02 6.605471e-03 -## 10: TRUE 0.58 FALSE 4.345906e-02 3.114916e-02 6.605471e-03 +## 9: TRUE 0.77 FALSE 3.195747e-02 2.644584e-02 6.605471e-03 +## 10: TRUE 0.77 FALSE 3.195747e-02 2.644584e-02 6.605471e-03 ## 11: NA NA NA NA NA NA -## 12: FALSE 0.07 FALSE 6.066062e-01 3.508156e-01 1.380439e-02 -## 13: TRUE 1.00 TRUE 1.157590e-78 2.740787e-78 5.067939e-84 -## 14: TRUE 1.00 TRUE 5.396175e-49 1.362972e-48 6.413781e-53 -## 15: TRUE 0.00 TRUE 2.400417e-21 3.058516e-21 1.066067e-24 -## 16: TRUE 1.00 TRUE 1.198507e-44 1.836449e-44 1.516132e-46 +## 12: FALSE 0.10 FALSE 6.349085e-01 3.659691e-01 1.380439e-02 +## 13: TRUE 1.00 TRUE 1.663636e-78 4.008386e-78 5.067939e-84 +## 14: TRUE 1.00 TRUE 6.206525e-49 1.506499e-48 6.413781e-53 +## 15: TRUE 0.00 TRUE 2.148586e-21 3.164993e-21 1.066067e-24 +## 16: TRUE 1.00 TRUE 1.358285e-44 1.879607e-44 1.516132e-46 ## 17: NA NA NA NA NA NA -## 18: FALSE 0.00 TRUE 6.866219e-01 1.656446e-01 3.567434e-01 +## 18: FALSE 0.00 TRUE 7.328544e-01 1.542445e-01 4.212087e-01 ## 19: NA NA NA NA NA NA -## 20: FALSE 0.00 TRUE 9.481676e-01 7.258719e-02 7.948790e-01 -## 21: FALSE 0.00 TRUE 9.481676e-01 7.258719e-02 7.948790e-01 +## 20: FALSE 0.00 TRUE 9.520456e-01 6.717831e-02 7.948790e-01 +## 21: FALSE 0.00 TRUE 9.520456e-01 6.717831e-02 7.948790e-01 ## sig boot_dtu_freq conf boot_p_mean boot_p_stdev boot_p_min ## boot_p_max boot_na ## 1: NA NA @@ -848,14 +848,11 @@

Plots of overall run

Possibly the most common plot in differential expression is the volcano plot, which plots the effect size against the statistical significance. As it is difficult to define a single p-value and a single effect size at the gene level, the volcano can only be plotted at the transcript level.

# Proportion change VS significance.
 plot_overview(mydtu, type="volcano")
-
## Warning: Removed 11 rows containing missing values (geom_point).
-

-

And this is what it looks like on a larger dataset: Dprop VS sig

+

This is what it looks like on a larger dataset: Dprop VS sig

The next command plots the largest change in proportion seen within each gene, against the number of genes showing such change. This is a way to inspect what effect sizes are present in the data. As an additional layer of information, they are colour-coded by their DTU call.

# Distribution of maximum proportion change.
 plot_overview(mydtu, type="maxdprop")
-

-

And this is what it looks like on a larger dataset: Max Dprop

+

This is what it looks like on a larger dataset: Max Dprop

Plot customisation

@@ -865,14 +862,12 @@

Plot customisation

myplot <- plot_overview(mydtu, "volcano") myplot # display
## Warning: Removed 11 rows containing missing values (geom_point).
-

-
# Change scale of y axis to linear. 
-myplot2 <- myplot + scale_y_continuous(trans = "identity")
-
## Scale for 'y' is already present. Adding another scale for 'y', which
-## will replace the existing scale.
-
myplot2
+

+
# Change title. 
+myplot2 <- myplot + ggtitle("My epic title")
+myplot2
## Warning: Removed 11 rows containing missing values (geom_point).
-

+

@@ -922,8 +917,7 @@

Bootstrapping & Confidence in DTU calls

# Skip bootstraps. mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", name_B = "patients", boots = "none") -

As measure of confidence in the DTU calls, we take the fraction of iterations that returned a positive DTU call for the given gene/transcript. A positive call is considered confident if the fraction of positive calls exceeds the threshold (>= conf_thresh). A negative call is considered confident if the fraction of positive calls is below the complement of the threshold (<= 1 - conf_thresh).

-

The default confidence threshold is somewhat strict, but can be over-ridden.

+

As measure of confidence in the DTU calls, we take the fraction of iterations that returned a positive DTU call for the given gene/transcript. A positive call is considered confident if the fraction of positive calls exceeds the threshold (>= conf_thresh). A negative call is considered confident if the fraction of positive calls is below the complement of the threshold (<= 1 - conf_thresh). The default confidence threshold can be over-ridden.

mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", 
                   name_B = "patients", conf_thresh = 0.9)

In order to visualise the effect of the confidence threshold, the number of positive DTU calls can be plotted against the threshold.

diff --git a/vignettes/.build.timestamp b/vignettes/.build.timestamp deleted file mode 100644 index e69de29..0000000 diff --git a/vignettes/fig/maxdprop.jpg b/vignettes/fig/maxdprop.jpg new file mode 100644 index 0000000..e3fd7b6 Binary files /dev/null and b/vignettes/fig/maxdprop.jpg differ diff --git a/vignettes/fig/maxdprop.png b/vignettes/fig/maxdprop.png deleted file mode 100644 index 9f4a452..0000000 Binary files a/vignettes/fig/maxdprop.png and /dev/null differ diff --git a/vignettes/fig/volcano.jpg b/vignettes/fig/volcano.jpg new file mode 100644 index 0000000..8ccffb6 Binary files /dev/null and b/vignettes/fig/volcano.jpg differ diff --git a/vignettes/fig/volcano.png b/vignettes/fig/volcano.png deleted file mode 100644 index 3449c06..0000000 Binary files a/vignettes/fig/volcano.png and /dev/null differ diff --git a/vignettes/tutorial.R b/vignettes/tutorial.R deleted file mode 100644 index 6f5a3a4..0000000 --- a/vignettes/tutorial.R +++ /dev/null @@ -1,241 +0,0 @@ -## ----eval=FALSE---------------------------------------------------------- -# # 1. Build latest developmental version from Github: -# devtools::install_github("bartongroup/rats") -# -# # 2. Load into R session. -# library{rats} -# -# # 3. Specify transcript grouping: -# my_identifiers_table <- annot2ids("my_annotation.gtf") -# -# # 4a. Call DTU on a sleuth object, using default settings: -# mydtu <- call_DTU(annot= my_identifiers_table, slo= my_sleuth_object, -# name_A= "My_condition", name_B= "My_other_condition") -# # 4b. Call DTU on generic bootstrapped abundance estimates: -# mydtu <- call_DTU(annot= my_identifiers_table, boot_data_A= my_list_data_tables_A, -# boot_data_B= my_list_data_tables_A) -# # 4c. Call DTU on generic abundance estimates: -# mydtu <- call_DTU(annot= my_identifiers_table, count_data_A= my_data_table_A, -# count_data_B= my_data_table_B, boots= "none") -# -# # 5. Get all gene and transcript identifiers per category (significant DTU, -# # no DTU, Not Applicable): -# myids <- get_dtu_ids(mydtu) -# -# # 6. Plot significance VS effect size: -# plot_overview(mydtu) - -## ------------------------------------------------------------------------ -library(rats) - -## ------------------------------------------------------------------------ -# Show the first rows of the table corresponding to one sample, from simulated data. -head(sim_boot_data()[[2]][[1]]) - -## ------------------------------------------------------------------------ -# Show the first rows of the table corresponding to the annotation, from simulated data. -head(sim_count_data()[[1]]) - -## ----eval=FALSE---------------------------------------------------------- -# # Extract transcript ID to gene ID index from a GTF annotation. -# myannot <- annot2ids("my_annotation_file.gtf") - -## ------------------------------------------------------------------------ -# Simulate some data. -simdat <- sim_count_data() - -# For convenience let's assign the contents of the list to separate variables. -mycond_A <- simdat[[2]] # Simulated abundances for one condition. -mycond_B <- simdat[[3]] # Simulated abundances for other condition. -myannot <- simdat[[1]] # Transcript and gene Identifiers for the above data. - -## ------------------------------------------------------------------------ -# Find DTU between the simulated datasets. -mydtu <- call_DTU(annot= myannot, count_data_A= mycond_A, count_data_B= mycond_B, - boots= "none", verbose= FALSE, - name_A= "healthy", name_B= "patients", varname= "My phenotype", - description="Comparison of two simulated counts datasets for the - tutorial. Simulated using built-in functionality of - `rats`.") - -## ------------------------------------------------------------------------ -# Simulate some data. -simdat <- sim_boot_data() - -# For convenience let's assign the contents of the list to separate variables. -mycond_A <- simdat[[2]] # Simulated bootstrapped data for one condition. -mycond_B <- simdat[[3]] # Simulated bootstrapped data for other condition. -myannot <- simdat[[1]] # Transcript and gene Identifiers for the above data. - -## ------------------------------------------------------------------------ -# Find DTU between conditions "controls" and "patients" in the simulated data. -mydtu <- call_DTU(annot= myannot, boot_data_A= mycond_A, boot_data_B= mycond_B, - name_A= "wildtype", name_B= "some mutant", varname = "My phenotype", - verbose= FALSE, description="Comparison of two simulated datasets - of bootstrapped counts for the tutorial. Simulated using built-in - functionality of `rats`.") - -## ------------------------------------------------------------------------ -# Simulate some data. -simdat <- sim_sleuth_data(cnames = c("controls", "patients")) -# controls and patients are arbitrary names to use as conditions. - -# For convenience let's assign the contents of the list to separate variables. -myslo <- simdat$slo # Simulated minimal sleuth object. -myannot <- simdat$annot # Transcript and gene Identifiers for the above data. - -## ------------------------------------------------------------------------ -# Find DTU between conditions "controls" and "patients" in the simulated data. -mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", name_B = "patients", - varname= "condition", verbose= FALSE, - description="Using a simulated sleuth object for the purposes of the tutorial. - Simulated using built-in functionality of `rats`.") - -## ------------------------------------------------------------------------ -# See available variables and values. -print( myslo$sample_to_covariates ) - -## ----eval=FALSE---------------------------------------------------------- -# # Compare samples by a non-default variable. -# mydtu <- call_DTU(annot= myannot, slo= myslo, name_A= "ba", name_B= "bb", varname= "batch") - -## ------------------------------------------------------------------------ -# A really simple tally of the outcome. -print( dtu_summary(mydtu) ) - -## ------------------------------------------------------------------------ -# Gene and transcript IDs corresponding to the tally above. -ids <- get_dtu_ids(mydtu) - -# Contents -print( names(ids) ) - -# DTU positive genes. -print( ids[["dtu-genes"]] ) - -## ------------------------------------------------------------------------ -print( names(mydtu) ) - -## ------------------------------------------------------------------------ -# Parameter list's elements. -print( names(mydtu$Parameters) ) - -## ------------------------------------------------------------------------ -# Genes table's fields. -print( names(mydtu$Genes) ) - -## ------------------------------------------------------------------------ -# Transcripts table's fields. -print( names(mydtu$Transcripts) ) - -## ------------------------------------------------------------------------ -# Let's check the info and settings. -print( mydtu$Parameters ) - -## ------------------------------------------------------------------------ -# Gene-level calls. -print( mydtu$Genes ) - -## ------------------------------------------------------------------------ -# Transcript-level calls. -print( mydtu$Transcripts ) - -## ------------------------------------------------------------------------ -# Proportion changes for all the transcripts of the "MIX6" gene. -plot_gene(mydtu, "MIX6", vals="proportions") - -## ------------------------------------------------------------------------ -# Absolute expression changes for all the transcripts of the "MIX6" gene. -# The ERROR BARS represent 2 standard deviations from the mean count across replicates. -plot_gene(mydtu, "MIX6", vals="counts") - -## ------------------------------------------------------------------------ -# Proportion change VS significance. -plot_overview(mydtu, type="volcano") - -## ------------------------------------------------------------------------ -# Distribution of maximum proportion change. -plot_overview(mydtu, type="maxdprop") - -## ------------------------------------------------------------------------ -library(ggplot2) - -myplot <- plot_overview(mydtu, "volcano") -myplot # display - -# Change scale of y axis to linear. -myplot2 <- myplot + scale_y_continuous(trans = "identity") -myplot2 - -## ----eval=FALSE---------------------------------------------------------- -# # Calling DTU with custom thresholds. -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", name_B = "patients", -# p_thresh = 0.01, count_thresh = 10, dprop_thresh = 0.25) - -## ------------------------------------------------------------------------ -# Compare by a different variable. In this case "batch". -mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "ba", name_B = "bb", - varname= "batch", verbose = FALSE) - -## ----eval=FALSE---------------------------------------------------------- -# # Bootstrap both types of DTU calls (default), for 100 iterations (default). -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", boots = "both", bootnum = 100) -# -# # Only bootstrap transcript calls. -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", boots = "transc") -# -# # Only bootstrap gene calls. -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", boots = "genes") -# -# # Skip bootstraps. -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", boots = "none") - -## ----eval=FALSE---------------------------------------------------------- -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", conf_thresh = 0.9) - -## ----eval=FALSE---------------------------------------------------------- -# # Transcript-level confidence threshold VS. number of DTU positive calls. -# plot_overview(mydtu, type="transc_conf") -# -# # Gene-level confidence threshold VS. number of DTU positive calls. -# plot_overview(mydtu, type="gene_conf") - -## ----eval=FALSE---------------------------------------------------------- -# # Transcripts only. -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", testmode="transc") -# # Genes only. -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", testmode="genes") - -## ----eval=FALSE---------------------------------------------------------- -# # Bonferroni correction. -# mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", -# name_B = "patients", correction = "bonferroni") - -## ------------------------------------------------------------------------ -# Lets emulate some input with custom field names. -sim <- sim_sleuth_data(varname="mouse", cnames=c("Splinter", "Mickey"), - COUNTS_COL="the-counts", TARGET_COL="transcript", - PARENT_COL="gene", BS_TARGET_COL = "trscr") -myslo <- sim$slo -myannot <- sim$annot - -print( sim$slo$sample_to_covariates ) -print( head(sim$slo$kal[[1]]$bootstrap[[1]]) ) -print( head(sim$annot) ) - -## ------------------------------------------------------------------------ -# Call DTU on data with custom field names. -mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "Splinter", name_B = "Mickey", - varname="mouse", TARGET_COL="transcript", PARENT_COL="gene", - COUNTS_COL="the-counts", BS_TARGET_COL="trscr", verbose = FALSE) - -## ------------------------------------------------------------------------ -print( names(mydtu$Transcripts) ) - diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 2ead84d..bbfda1f 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -591,24 +591,24 @@ Possibly the most common plot in differential expression is the volcano plot, wh the statistical significance. As it is difficult to define a single p-value and a single effect size at the gene level, the volcano can only be plotted at the transcript level. -```{r} +```{r eval=FALSE} # Proportion change VS significance. plot_overview(mydtu, type="volcano") ``` -And this is what it looks like on a larger dataset: +This is what it looks like on a larger dataset: ![Dprop VS sig](./fig/volcano.jpg) The next command plots the largest change in proportion seen within each gene, against the number of genes showing such change. This is a way to inspect what effect sizes are present in the data. As an additional layer of information, they are colour-coded by their DTU call. -```{r} +```{r eval=FALSE} # Distribution of maximum proportion change. plot_overview(mydtu, type="maxdprop") ``` -And this is what it looks like on a larger dataset: +This is what it looks like on a larger dataset: ![Max Dprop](./fig/maxdprop.jpg) @@ -624,8 +624,8 @@ library(ggplot2) myplot <- plot_overview(mydtu, "volcano") myplot # display -# Change scale of y axis to linear. -myplot2 <- myplot + scale_y_continuous(trans = "identity") +# Change title. +myplot2 <- myplot + ggtitle("My epic title") myplot2 ``` @@ -714,8 +714,7 @@ mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", As measure of confidence in the DTU calls, we take the fraction of iterations that returned a positive DTU call for the given gene/transcript. A positive call is considered confident if the fraction of positive calls exceeds the threshold (`>= conf_thresh`). A negative call is considered confident if the fraction of positive calls is below the complement of the threshold (`<= 1 - conf_thresh`). - -The default confidence threshold is somewhat strict, but can be over-ridden. +The default confidence threshold can be over-ridden. ```{r eval=FALSE} mydtu <- call_DTU(annot = myannot, slo = myslo, name_A = "controls", diff --git a/vignettes/tutorial_files/figure-html/unnamed-chunk-27-2.png b/vignettes/tutorial_files/figure-html/unnamed-chunk-27-2.png index 49e3232..5eb3d1d 100644 Binary files a/vignettes/tutorial_files/figure-html/unnamed-chunk-27-2.png and b/vignettes/tutorial_files/figure-html/unnamed-chunk-27-2.png differ