Merge pull request #30 from olivroy/knitr-section

Support knitr notebooks default params (major performance blow?) will…
olivroy · Jun 7, 2024 · 1cad32f · 1cad32f
2 parents 92c7a66 + 41d5487
commit 1cad32f
Show file tree

Hide file tree

Showing 13 changed files with 150 additions and 64 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,5 +3,6 @@
 .Rdata
 .httr-oauth
 .DS_Store
+README.knit.md
 .quarto
 docs
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # reuseme (development version)
 
+* Outline elements present more than four times in a file will not be printed as they are considered placeholders. (like generic test name)
+
+* `proj_outline()` now detects [knitr notebooks](https://rmarkdown.rstudio.com/articles_report_from_r_script.html) that use the default options. Internally, the file is transformed into a md file by stripping roxygen comments, and is processed as such. (#30)
+
 * `proj_outline()` no longer shows `complete_todo()` links for items in non-interactive sessions. `complete_todo()` links are now only shown when calling `file_outline()` on the active file.
 
 * `proj_list()` / `proj_switch()` no longer opens a nested project if looking for `"pkgdown"`, `"testthat"`, etc.

diff --git a/R/outline-criteria.R b/R/outline-criteria.R
@@ -32,12 +32,28 @@ o_is_roxygen_comment <- function(x, file_ext = NULL) {
   ifelse(rep(is_r_file, length.out = length(x)), stringr::str_starts(x, "#'\\s"), FALSE)
 }
 
-o_is_todo_fixme <- function(x) {
-  has_todo <- stringr::str_detect(x, "(?<!\"#\\s)(TODO[^\\.]\\:?|FIXME\\s|BOOK|(?<!\")WORK[^I``])")
+o_is_notebook <- function(x, file, file_ext, line) {
+  # Like roxy comments and first line = --, 2nd title.
+  # x$is_notebook <- grepl("notebook.*\\.R", x$file)
+  # Detect #' ---
+  any_notebooks <- grep("^#' ---", x[line == 1 & file_ext == "R"], fixed = FALSE)
+  if (length(any_notebooks) > 0L) {
+    is_notebook <- file %in% file[line == 1 & file_ext == "R"][any_notebooks]
+  } else {
+    is_notebook <- FALSE
+  }
+  is_notebook
+}
 
+o_is_todo_fixme <- function(x, is_roxygen_comment = FALSE) {
+  has_todo <- !is_roxygen_comment &
+    grepl("(?<!\"#\\s)(TODO[^\\.]\\:?|FIXME\\s|BOOK|(?<!\")WORK[^I``])", x, perl = TRUE)
   if (!any(has_todo)) {
     return(has_todo)
   }
+  if (length(is_roxygen_comment) == 1) {
+    is_roxygen_comment <- rep(is_roxygen_comment, length.out = length(x))
+  }
   # only check for potential candidates
   p <- which(has_todo)
   candidates <- x[has_todo]
@@ -46,19 +62,19 @@ o_is_todo_fixme <- function(x) {
     !o_is_test_that(candidates) &
     !stringr::str_starts(candidates, "\\s*\"\\s*") &
     !grepl("extract_tag_in_text", candidates, fixed = TRUE) &
-    !o_is_roxygen_comment(candidates) & # don't put these tags in documentation :)
+    !is_roxygen_comment[p] & # don't put these tags in documentation :)
     !stringr::str_detect(candidates, "grepl?\\(|g?sub\\(|str_detect|str_remove|str_extract|use_todo|,\\stodo\\)|TODO\\.R|TODO file|@param") &
     !stringr::str_detect(candidates, "[:upper:]\"|[:upper:]{4,10} item") & # eliminate false positives
     !stringr::str_detect(candidates, "\".{0,100}(TODO|FIXME|WORK)") # remove some true negs for now.
   has_todo
 }
 
-o_is_work_item <- function(x) {
+o_is_work_item <- function(x, is_roxygen_comment = FALSE) {
   res <- stringr::str_detect(x, "(?<!\")# WORK")
   if (!any(res)) {
     return(res)
   }
-  res[which(res)] <- o_is_todo_fixme(x[which(res)])
+  res[which(res)] <- o_is_todo_fixme(x[which(res)], is_roxygen_comment)
   res
 }
 
@@ -92,13 +108,13 @@ o_is_tab_plot_title <- function(x) {
     !stringr::str_detect(x, "expect_error|header\\(\\)|```\\{|guide_")
 }
 
-o_is_section_title <- function(x, roxy_section = FALSE) {
-  is_section_title <- stringr::str_detect(x, "^\\s{0,4}\\#+\\s+(?!\\#)") | roxy_section # remove commented  add roxygen
+o_is_section_title <- function(x, is_roxygen_comment = FALSE, is_todo_fixme = FALSE) {
+  is_section_title <- !is_roxygen_comment & !is_todo_fixme & stringr::str_detect(x, "^\\s{0,4}\\#+\\s+(?!\\#)") & !is_roxygen_comment # remove commented  add roxygen
   if (!any(is_section_title)) {
     return(is_section_title)
   }
-  if (roxy_section) {
-    x <- sub(":$", "", x)
+  if (length(is_roxygen_comment) == 1) {
+    rep(is_roxygen_comment, length.out = length(is_section_title))
   }
   uninteresting_headings <- paste(
     "(Tidy\\s?T(uesday|emplate)|Readme|Wrangle|Devel)$|error=TRUE",
@@ -110,17 +126,11 @@ o_is_section_title <- function(x, roxy_section = FALSE) {
   p_s_title <- which(is_section_title)
   is_section_title[p_s_title] <-
     !grepl(uninteresting_headings, x[p_s_title]) &
-      !o_is_todo_fixme(x[p_s_title]) &
-      !o_is_commented_code(x[p_s_title]) &
       # to exclude md tables from outline
       stringr::str_count(x[p_s_title], "\\|") < 4
   is_section_title
 }
 
-o_is_commented_code <- function(x) {
-  stringr::str_detect(x, "#.+\\(.+\\=.+[\\),\"']$")
-}
-
 o_is_cli_info <- function(x, is_snap_file = FALSE, file = "file") {
   has_cli <- grepl("cli_", x, fixed = TRUE)
 
@@ -150,14 +160,31 @@ define_outline_criteria <- function(.data, print_todo) {
   x$is_md <- x$is_md & !x$is_news # treating news and other md files differently.
   x$is_test_file <- grepl("tests/testthat/test", x$file, fixed = TRUE)
   x$is_snap_file <- grepl("_snaps", x$file, fixed = TRUE)
-
+  x$is_roxygen_comment <- o_is_roxygen_comment(x$content, x$file_ext)
+  if (any(x$is_roxygen_comment)) {
+    # detect knitr notebooks
+    x$is_notebook <- o_is_notebook(x = x$content, x$file, x$file_ext, x$line)
+    x$content[x$is_notebook] <- sub("^#'\\s?", "", x$content[x$is_notebook])
+    x$is_md <- (x$is_md | x$is_roxygen_comment | x$is_notebook) & !x$is_news # treating news and other md files differently.
+    x$is_roxygen_comment <- x$is_roxygen_comment & !x$is_notebook
+    # TODO extract title in roxy comments (@title too.L)
+    # x <- dplyr::mutate(
+    #   x,
+    #   # Remove any files that contain noRd roxy tag to avoid false positive (this limitation can be overcome if / when I use roxygen2 parser)
+    #   is_roxygen_comment = is_roxygen_comment & !any(startsWith(content, "#' @noRd")),
+    #   .by = file
+    # )
+    # x$is_object_title <- x$is_roxygen_comment & (x$line == 1 | dplyr::lag(x$ccontent, "Nothing") %in% c("", " ", "  ") & dplyr::lead(x$ccontent) %in% c("#'", "#'  ", "#'   "))
+  } else {
+    x$is_notebook <- FALSE
+  }
   x <- dplyr::mutate(
     x,
     # Problematic when looking inside functions
     # maybe force no leading space.
     # TODO strip is_cli_info in Package? only valid for EDA
     is_cli_info = o_is_cli_info(content, is_snap_file, file),
-    is_doc_title = stringr::str_detect(content, "(?<![-(#\\s?)_[:alpha:]])title\\:.{4,100}") &
+    is_doc_title = stringr::str_detect(content, "(?<![-(#\\s?)_[:alpha:]'\"])title\\:.{4,100}") &
       !stringr::str_detect(content, "No Description|Ttitle|Subtitle|[Tt]est$|\\\\n") & line < 50 &
       !stringr::str_detect(dplyr::lag(content, default = "nothing to detect"), "```yaml"),
     is_chunk_cap = stringr::str_detect(content, "\\#\\|.*(cap|title):"),
@@ -171,12 +198,12 @@ define_outline_criteria <- function(.data, print_todo) {
     ),
     is_chunk_cap_next = is_chunk_cap,
     is_test_name = is_test_file & o_is_test_that(content) & !o_is_generic_test(content),
-    is_section_title = o_is_section_title(content),
+    is_todo_fixme = print_todo & o_is_todo_fixme(content, is_roxygen_comment) & !is_snap_file,
+    is_section_title = o_is_section_title(content, is_roxygen_comment, is_todo_fixme),
     pkg_version = extract_pkg_version(content, is_news, is_section_title),
     is_section_title_source = is_section_title &
       stringr::str_detect(content, "[-\\=]{3,}|^\\#'") &
       stringr::str_detect(content, "[:alpha:]"),
-    is_todo_fixme = print_todo & o_is_todo_fixme(content) & !o_is_roxygen_comment(content, file_ext) & !is_snap_file,
     n_leading_hash = nchar(stringr::str_extract(content, "\\#+")),
     n_leading_hash = dplyr::coalesce(n_leading_hash, 0),
     # Make sure everything is second level in revdep/.
@@ -185,7 +212,6 @@ define_outline_criteria <- function(.data, print_todo) {
     is_cross_ref = stringr::str_detect(content, "docs_links?\\(") & !stringr::str_detect(content, "@param|\\{\\."),
     is_function_def = grepl("<- function(", content, fixed = TRUE) & !stringr::str_starts(content, "\\s*#"),
     is_tab_or_plot_title = o_is_tab_plot_title(content) & !is_section_title & !is_function_def,
-    is_a_comment_or_code = stringr::str_detect(content, "!=|\\|\\>|\\(\\.*\\)"),
   )
   x <- dplyr::mutate(
     x,

diff --git a/R/outline.R b/R/outline.R
@@ -381,6 +381,16 @@ print.outline_report <- function(x, ...) {
   recent_only <- x$recent_only[1]
   file_sections$link_rs_api <- stringr::str_replace_all(file_sections$link_rs_api, custom_styling)
 
+  if (anyDuplicated(stats::na.omit(file_sections$outline_el)) > 0L) {
+    # Remove all things that appear more than 4 times in a file.
+    # this typically indicates a placeholder
+    file_sections <- dplyr::filter(
+      file_sections,
+      dplyr::n() < 4,
+      .by = c(.data$file, .data$outline_el)
+    )
+  }
+
   summary_links_files <- file_sections |>
     dplyr::filter(!is_function_def) |>
     dplyr::summarise(
@@ -488,8 +498,8 @@ keep_outline_element <- function(.data) {
   dat <- dplyr::filter(
     .data,
     (is_news & (
-      (!simplify_news & is_section_title & !is_a_comment_or_code & before_and_after_empty) |
-        (simplify_news & is_section_title & !pkg_version %in% versions_to_drop & !is_second_level_heading_or_more & !is_a_comment_or_code & before_and_after_empty)
+      (!simplify_news & is_section_title & before_and_after_empty) |
+        (simplify_news & is_section_title & !pkg_version %in% versions_to_drop & !is_second_level_heading_or_more & before_and_after_empty)
     )) |
       # still regular comments in .md files
       # what to keep in .md docs
@@ -709,6 +719,7 @@ construct_outline_link <- function(.data, is_saved_doc, is_active_doc, dir_commo
     style_fun = NULL,
     is_saved_doc = NULL,
     is_roxygen_comment = NULL,
+    is_notebook = NULL,
     complete_todo_link = NULL,
     is_news = NULL,
     # I may put it back ...

diff --git a/README.md b/README.md
@@ -167,7 +167,7 @@ bench::mark(
 #> # A tibble: 1 × 6
 #>   expression                     min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>                <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 outline <- proj_outline()    495ms    504ms      1.98    21.1MB     4.96
+#> 1 outline <- proj_outline()    685ms    685ms      1.46    22.2MB     2.92
 ```
 
 <details>
@@ -218,6 +218,7 @@ outline
 #> 
 #> ── `R/outline-criteria.R`
 #> `i` Add variable to outline data frame
+#> `i` TODO extract title in roxy comments (@title too.L)
 #> `i` TODO strip is_cli_info in Package? only valid for EDA
 #> `i` FIXME try to detect all the chunk caption, but would have to figure out the end of it maybe lightparser.
 #> `i` it is 'R/outline.R'
@@ -254,6 +255,9 @@ outline
 #> 
 #> ── `R/utils.R`  OS utils
 #> 
+#> ── `tests/testthat/_outline/knitr-notebook.R`  Crop Analysis Q3 2013
+#> `i` A great section
+#> 
 #> ── `tests/testthat/_outline/my-analysis.md`  My doc title
 #> `i` A section
 #> `i` Dashboard card
@@ -312,6 +316,7 @@ outline
 #> `i` alpha and work_only arguments work
 #> `i` file_outline() is a data frame
 #> `i` pattern works as expected
+#> `i` file_outline() detects correctly knitr notebooks
 #> 
 #> ── `tests/testthat/_snaps/proj-list.md`
 #> `i` proj_file() works

diff --git a/reuseme.Rproj b/reuseme.Rproj
@@ -1,28 +1,28 @@
-Version: 1.0
-
-RestoreWorkspace: No
-SaveWorkspace: No
-AlwaysSaveHistory: Default
-
-EnableCodeIndexing: Yes
-UseSpacesForTab: Yes
-NumSpacesForTab: 2
-Encoding: UTF-8
-
-RnwWeave: Sweave
-LaTeX: pdfLaTeX
-
-AutoAppendNewline: Yes
-StripTrailingWhitespace: Yes
-LineEndingConversion: Posix
-
-BuildType: Package
-PackageUseDevtools: Yes
-PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace
-
-UseNativePipeOperator: Yes
-
-MarkdownWrap: Sentence
-
-SpellingDictionary: en_CA
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+LineEndingConversion: Posix
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
+
+UseNativePipeOperator: Yes
+
+MarkdownWrap: Sentence
+
+SpellingDictionary: en_CA
diff --git a/tests/testthat/_outline/knitr-notebook.R b/tests/testthat/_outline/knitr-notebook.R
@@ -0,0 +1,18 @@
+#' ---
+#' title: "Crop Analysis Q3 2013"
+#' author: "John Smith"
+#' date: "May 3rd, 2014"
+#' ---
+#'
+
+#+ fig.width=5, fig.height=5
+plot(cars)
+
+#' # A great section
+#'
+#' Text with [markdown](https://markdown.org)
+#'
+#' Learn how to [Compile Report from R](https://rmarkdown.rstudio.com/articles_report_from_r_script.html)
+#'
+
+plot(mtcars)
diff --git a/tests/testthat/_outline/my-analysis.md b/tests/testthat/_outline/my-analysis.md
@@ -4,6 +4,12 @@ title: My doc title
 
 # A section
 
+```yaml
+
+    'title: "Tutorial"',
+    "title: tutorial-test-1",
+```
+
 ```{r}
 #| title: Dashboard card
 

diff --git a/tests/testthat/_outline/titles.md b/tests/testthat/_outline/titles.md
@@ -7,6 +7,6 @@
 
 output <!--# TODO this is an item --> not a todo
 
-# Last title
+# Last title {.unnumbered}
 
 ## `function_name()` title
diff --git a/tests/testthat/_snaps/outline-criteria.md b/tests/testthat/_snaps/outline-criteria.md
@@ -4,8 +4,8 @@
       cat(outline_crit, sep = "\n")
     Output
       o_is_cli_info
-      o_is_commented_code
       o_is_generic_test
+      o_is_notebook
       o_is_roxygen_comment
       o_is_section_title
       o_is_tab_plot_title

diff --git a/tests/testthat/_snaps/outline.md b/tests/testthat/_snaps/outline.md
@@ -86,3 +86,13 @@
       
       -- `outline-script.R`  Example for `file_outline()` 
 
+# file_outline() detects correctly knitr notebooks
+
+    Code
+      file_outline(path = test_path("_outline", "knitr-notebook.R"))
+    Message
+      
+      -- `knitr-notebook.R`  Crop Analysis Q3 2013 
+    Output
+      `i` A great section
+
diff --git a/tests/testthat/test-outline-criteria.R b/tests/testthat/test-outline-criteria.R
@@ -1,4 +1,10 @@
 # Test individual outline elements ------
+test_that("o_is_notebook() works", {
+  expect_true(o_is_notebook("#' ---", "file.R", file_ext = "R", line = 1))
+  expect_false(o_is_notebook("#' ---", "file.R", file_ext = "qmd", line = 1))
+  expect_false(o_is_notebook("#' Fn title", "file.R", file_ext = "qmd", line = 1))
+})
+
 test_that("o_is_roxygen_comment() works", {
   expect_true(o_is_roxygen_comment("#' @param"))
   expect_equal(
@@ -18,6 +24,8 @@ test_that("o_is_todo_fixme() works", {
   expect_false(o_is_todo_fixme("  expect_true(o_is_todo_fixme(\"  # TODO this is important\"))"))
   # avoid finding comments.
   expect_false(o_is_todo_fixme("# another TODO item"))
+  expect_false(o_is_todo_fixme("#' TODO, WORK, FIXME)", is_roxygen_comment = T))
+
 })
 
 test_that("o_is_work_item() works", {
@@ -50,17 +58,8 @@ test_that("o_is_section_title() works", {
   expect_true(o_is_section_title("# Analysis of this"))
   expect_true(o_is_section_title("  # section 1 ----"))
   expect_false(o_is_section_title("# TidyTuesday"))
-  expect_false(o_is_section_title("Function ID:", roxy_section = TRUE))
-  expect_false(o_is_section_title("#' @section Function ID:", roxy_section = TRUE))
-})
-
-test_that("o_is_commented_code() works", {
-  expect_true(o_is_commented_code("# DiagrammeR(x = 1,"))
-  expect_true(o_is_commented_code("# DiagrammeR(x = 1)"))
-  expect_true(o_is_commented_code("#' # DiagrammeR(x = 1)"))
-  expect_true(o_is_commented_code("# DiagrammeR(x = 1\""))
-
-  expect_false(o_is_commented_code("# A new section {.unnumbered}"))
+  # not considering roxygen sections anymore.
+  expect_false(o_is_section_title("#' # A very interesting section"))
 })
 
 test_that("o_is_cli_info() works", {