diff --git a/DESCRIPTION b/DESCRIPTION index 9cd1826..edc3f7b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metapr2 Title: Eukaryotic 18S rRNA metabarcode database -Version: 1.0.2 +Version: 1.0.3 Authors@R: person(given = "Daniel", family = "Vaulot", @@ -37,6 +37,7 @@ Imports: scrypt, shiny, shinycssloaders, + shinylogs, shinymanager, shinyvalidate, shinyWidgets, diff --git a/Dockerfile b/Dockerfile index 6c5917f..ef9372b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,7 +57,7 @@ RUN install2.r --error --skipinstalled \ ggforce yaml RUN install2.r --error --skipinstalled \ - purrr bslib + purrr bslib shinylogs # Install vsearch # https://github.com/FredHutch/docker-vsearch/blob/master/Dockerfile diff --git a/NEWS.md b/NEWS.md index b062c3f..b2eacf3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,28 +1,67 @@ +# metapr2 1.0.3 + +Released: 2022-04-30 + +### Database + +#### version 1.1 - 41 datasets +* Tara Ocean V9 samples have been reprocessed using the dada2 pipeline. In version 1.0, the original swarms were used instead of ASVs. + +### Tabs of application + +#### Datasets +* Selected datasets appear first ordered by dataset_id +* Search error fixed + + + +#### Query +* A fasta formatted sequence with header can now be used. + +#### Download +* The zipped file now contains a fasta file with the asv_code and the taxonomy in the header. + +--- + # metapr2 1.0.2 Released: 2021-12-14 -### Datasets +### Tabs of application + +#### Datasets * Settings (datasets, type of samples) can be saved and recalled -### Taxonomy +#### Taxonomy * Now more than one taxon can be selected * Three divisions can be removed (Fungi, Metazoa and Streptophyta) * Taxa (selected and excluded) can be saved and recalled * It is necessary to press the "Validate taxa" to replot after changing taxo selection -### Treemaps +#### Treemaps * Color of taxa now match other panels * Add a treemap of ASVs number -### Maps +#### Maps * Add topography * Add equator, tropics and polar circle -### Barplots +#### Barplots * Add number of samples for each bar -### Diversity Alpha +#### Diversity Alpha * Use Violin + Sina plot for discrete variable * Allow discretization of continuous variables (e.g. depth, latitude) @@ -32,10 +71,12 @@ Released: 2021-12-14 Released: 2021-11-22 -### Documentation +### Tabs of application + +#### Documentation * Using pkgdown: https://pr2database.github.io/metapr2-shiny/ -### Barplots +#### Barplots * Make interactive (R plotly library) * Add coloring by ecological function * Add time series @@ -47,3 +88,8 @@ Released: 2021-11-22 Released: 2021-11-19 * Initial release + +### Database + +#### version 1.0 - 41 datasets +* Tara Ocean V9 samples have been not been reprocessed and the original swarms are used instead. diff --git a/R/app.R b/R/app.R index f59b8e8..d69688b 100644 --- a/R/app.R +++ b/R/app.R @@ -39,6 +39,7 @@ shinymanager::set_labels( "Login" = "Enter metaPR2" ) + # User interface ---------------------------------------------------------- ui <- fluidPage( @@ -46,6 +47,9 @@ ui <- fluidPage( # Booststrap theme:https://rstudio.github.io/shinythemes/ # theme = bslib::bs_theme(bootswatch = "yeti"), + # Tracking not necessary in ui + # shinylogs::use_tracking(), + # Script to close the windows after some inactivity - ACTIVATE for web application tags$script(inactivity), @@ -100,6 +104,9 @@ server <- function(input, output, session) { # Stop the application of the session is closed (after 30 min) - ACTIVATE for web application session$onSessionEnded(stopApp) + # To track usage + shinylogs::track_usage(storage_mode = shinylogs::store_sqlite(path = "logs/")) + # Authentification authentification <- callModule(module = shinymanager::auth_server, diff --git a/R/fct_sequences.R b/R/fct_sequences.R index 6fe0131..4a99299 100644 --- a/R/fct_sequences.R +++ b/R/fct_sequences.R @@ -1,3 +1,16 @@ +# ========================================================================= +# --- Check that sequence is valid -------------------------------------------- +# ========================================================================= + + +sequence_clean <- function(sequence){ + sequence <- str_to_upper(sequence) + sequence <- str_replace_all(sequence, "^>.*" , "") # Remove fasta header in case it is present + sequence <- str_replace_all(sequence, "[\\r\\n]" , "") +} + + + # ========================================================================= # --- Check that sequence is valid -------------------------------------------- @@ -5,8 +18,7 @@ sequence_check <- function(sequence){ - sequence <- str_to_upper(sequence) - sequence <- str_replace_all(sequence, "[\r\n]" , "") + sequence <- sequence_clean(sequence) ((nchar(sequence) >= 130) & (str_detect(sequence, "[^ACGTRYSWKMBDHVN]", negate = TRUE))) } @@ -19,8 +31,7 @@ sequence_check <- function(sequence){ match_asv <- function(fasta.df, query){ - query <- str_to_upper(query) - query <- str_replace_all(query, "[\r\n]" , "") + query <- sequence_clean(query) query <- Biostrings::DNAString(query) @@ -51,6 +62,7 @@ blaster_asv <- function(fasta.df, query, minIdentity = 0.80, maxAccepts = 100){ + query <- sequence_clean(query) query <- data.frame(Id = "query", Seq =query) db <- fasta.df %>% @@ -84,5 +96,59 @@ blaster_asv <- function(fasta.df, query, return(df) } +# ========================================================================= +# --- Write fasta file with taxo ------------------------------------------ +# ========================================================================= + +#' @title Write a fasta file with the taxonomy +#' +#' @description +#' Write a fasta file from a set of sequences +#' Option : add to the definition line the the taxonomy separated by separator character (e.g. |) +#' +#' >Otu0001|Alveolata|Dinophyta|Syndiniales|Dino-Group-I|Dino-Group-I-Clade-1|Dino-Group-I-Clade-1_X|Dino-Group-I-Clade-1_X_sp. +#' +#' AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGA... +#' @param df The data frame with the otu names, the taxonomy and the sequences. It should have the following columns (with exactly these names) +#' +#' * seq_name : the sequence name +#' * supergroup: species +#' * sequence +#' @param file_name Character, where to save the fasta file +#' @param compress If TRUE produces a gz file +#' @param taxo_include If TRUE then add taxo information which must be provided +#' @param taxo_separator Character used to separate the different taxonomic levels +#' TRUE if it terminates OK +#' +#' @examples +#' fasta_write(df,"otu_taxo.fasta", compress=FALSE, include_taxo=TRUE, taxo_separator=";") +#' @md +#' @export + +fasta_write <- function(df,file_name, compress=FALSE, taxo_include=TRUE, taxo_separator="|") { + + # First remove the gaps (can be - or .) + df <- df %>% mutate(sequence = str_replace_all(sequence, "(-|\\.)","")) + + seq_out <- Biostrings::DNAStringSet(df$sequence) + + if (taxo_include==TRUE) { + names(seq_out) <- str_c(df$seq_name, + df$supergroup, + df$division, + df$class, + df$order, + df$family, + df$genus, + df$species, + sep=taxo_separator) + } + else { names(seq_out) <- df$seq_name + } + + Biostrings::writeXStringSet(seq_out, file_name, compress=compress, width = 20000) + + return(TRUE) +} diff --git a/R/module_datasets.R b/R/module_datasets.R index 6a68117..189e663 100644 --- a/R/module_datasets.R +++ b/R/module_datasets.R @@ -139,8 +139,9 @@ dataServer <- function(id, taxo, authentification) { req(asv_set()) DT::datatable(asv_set()$datasets %>% select(dataset_id, dataset_name, region, paper_reference, sequencing_technology, sample_number, asv_number, n_reads_mean) %>% - mutate(selected = ifelse(dataset_id %in% input$datasets_selected_id,TRUE, FALSE)) %>% - arrange(dataset_name) , + mutate(selected = ifelse(dataset_id %in% input$datasets_selected_id,TRUE, FALSE)) %>% + mutate(paper_reference = iconv(paper_reference, "latin1", to = "UTF-8")) %>% + arrange(-selected, dataset_name) , rownames = FALSE , options = list( autoWidth = FALSE, @@ -288,6 +289,9 @@ dataServer <- function(id, taxo, authentification) { if (authentification$user == "private") { dir_asv_set <- "data-qs-private" } + if (authentification$user == "ge") { + dir_asv_set <- "data-qs-ge" + } message("User: ", authentification$user) diff --git a/R/module_download.R b/R/module_download.R index 32bc8d5..3efe6b7 100644 --- a/R/module_download.R +++ b/R/module_download.R @@ -70,12 +70,18 @@ downloadServer <- function(id, datasets_selected, samples_selected, df_selected, file_datasets <- str_c(tmpdir, "/datasets.xlsx") file_samples <- str_c(tmpdir, "/samples.xlsx") file_asv <- str_c(tmpdir, "/asv.xlsx") + file_asv_fasta <- str_c(tmpdir, "/asv.fasta") + # file_asv_reads <- str_c(tmpdir, "/asv_reads.xlsx") - files = c(file_datasets, file_samples, file_asv) + files = c(file_datasets, file_samples, file_asv, file_asv_fasta) rio::export(datasets_selected(), file=file_datasets, overwrite = TRUE) rio::export(samples_selected(), file=file_samples, overwrite = TRUE) rio::export(fasta_selected(), file=file_asv, overwrite = TRUE) + # Export fasta file + fasta_selected() %>% + rename(seq_name = asv_code) %>% + fasta_write(file_asv_fasta) # rio::export(df_selected(), file=file_asv_reads, overwrite = TRUE) system2("zip", args=(paste("--junk-paths", path, files,sep=" "))) # remove the paths of the files diff --git a/R/module_query.R b/R/module_query.R index d9841db..1629ad9 100644 --- a/R/module_query.R +++ b/R/module_query.R @@ -75,7 +75,7 @@ queryServer <- function(id, samples_selected, df_all, fasta_all) { p(), sliderInput(ns("pct_id_min"), label ="% identity min", min = 80.0, max = 100.0, - step = 0.2, value = 100, width = "500px"), + step = 0.2, value = 95, width = "500px"), textAreaInput(ns("query"), label = "Query - at least 130 bp", value = "", width = "100%", height = "100px", diff --git a/README.Rmd b/README.Rmd index 297acea..3abed9f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -25,9 +25,9 @@ knitr::opts_chunk$set( ## A database of 18S rRNA metabarcodes -**Database version**: 1.0.0 - 41 datasets +**Database version**: 1.1 - 41 datasets -**Shiny application version**: 1.0.2 +**Shiny application version**: 1.0.3 ### Presentation diff --git a/README.md b/README.md index d76dea5..0d957cd 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,9 @@ ## A database of 18S rRNA metabarcodes -**Database version**: 1.0.0 - 41 datasets +**Database version**: 1.1 - 41 datasets -**Shiny application version**: 1.0.2 +**Shiny application version**: 1.0.3 ### Presentation diff --git a/docs/articles/index.html b/docs/articles/index.html index 35747ff..3c69b0d 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -10,7 +10,7 @@ metapr2 - 1.0.2 + 1.0.3