olivroy · olivroy · May 17, 2024 · May 17, 2024 · May 17, 2024 · May 17, 2024
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -47,7 +47,7 @@ See our guide on [how to create a great issue](https://code-review.tidyverse.org
 * `define_outline_criteria()` if an item shows as outline, but seems like a false positive, 
 
 
-* `keep_outline_element()`: if an element is **missing** from outline.
+* `keep_outline_element()`: if an element is **missing** from outline, you can add the keyword "REQUIRED ELEMENT" to get an object for debugging.
 
 * `define_important_element()` if an element is important [^1] 
 

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -35,9 +35,12 @@ Suggests:
     curl,
     gert,
     gt,
+    lightparser,
     magick,
     pillar,
+    roxygen2,
     testthat (>= 3.2.1),
+    tidyr,
     withr
 Config/testthat/edition: 3
 Encoding: UTF-8

diff --git a/NEWS.md b/NEWS.md
@@ -36,6 +36,12 @@ that will passed on to `proj_list()`
 
 * `proj_list()` / `proj_switch()` no longer opens a nested project if looking for `"pkgdown"`, `"testthat"`, etc.
 
+* `proj_outline()` was improved to work with roxygen2 and lightparser to parse file contents more consistenly. This means a slowdown, but the increased accuracy is worth it! Parsing a single file should still be pretty fast!
+
+* `proj_outline()` gains `exclude_tests` to exclude tests from outline
+
+* `proj_outline()` now detects legacy `fig.cap` in the chunk header. See `knitr::convert_chunk_headers()` for the newer approach. 
+
 * `active_rs_doc_nav()` is a new function to navigate to files pane location.
 
 `active_rs_doc_copy()` now accepts copying md and qmd files too and no longer allows renaming Rprofile.

diff --git a/R/outline-criteria.R b/R/outline-criteria.R
@@ -15,21 +15,26 @@
 #' * is test title
 #' * is a todo item
 #' * is_roxygen_line
-#' * is_tab_title
+#' * is_tab_plot_title
 #'
 #' @noRd
-o_is_roxygen_comment <- function(x, file_ext = NULL) {
+
+o_is_roxygen_comment <- function(x, file_ext = NULL, is_notebook = FALSE) {
   if (!is.null(file_ext)) {
-    is_r_file <- tolower(file_ext) == "r"
+    is_r_file <- tolower(file_ext) == "r" & !is_notebook
   } else {
-    is_r_file <- TRUE
+    is_r_file <- !is_notebook
   }
 
   if (!any(is_r_file)) {
     return(FALSE)
   }
 
-  ifelse(rep(is_r_file, length.out = length(x)), stringr::str_starts(x, "#'\\s"), FALSE)
+  ifelse(
+    rep(is_r_file, length.out = length(x)),
+    grepl("^#'\\s|^#'$", x), # detect roxygen comments in R files
+    FALSE # not a roxy comment in Rmd files, fusen is an exception?
+  )
 }
 
 o_is_notebook <- function(x, file, file_ext, line) {
@@ -106,14 +111,23 @@
     !stringr::str_detect(x, "expect_error|header\\(\\)|```\\{|guide_")
 }
 
-o_is_section_title <- function(x, is_roxygen_comment = FALSE, is_todo_fixme = FALSE) {
-  is_section_title <- !is_roxygen_comment & !is_todo_fixme & stringr::str_detect(x, "^\\s{0,4}\\#+\\s+(?!\\#)") & !is_roxygen_comment # remove commented  add roxygen
+o_is_section_title <- function(x, is_roxygen_comment = FALSE, is_todo_fixme = FALSE, roxy_section = FALSE) {
+  is_section_title <- roxy_section |
+    (!is_roxygen_comment & !is_todo_fixme & stringr::str_detect(x, "^\\s{0,4}\\#+\\s+(?!\\#)") & !is_roxygen_comment) # remove commented  add roxygen
   if (!any(is_section_title)) {
     return(is_section_title)
   }
   if (length(is_roxygen_comment) == 1) {
     rep(is_roxygen_comment, length.out = length(is_section_title))
   }
+  if (length(roxy_section) == 1) {
+    rep(roxy_section, length.out = length(is_section_title))
+  }
+  if (any(roxy_section)) {
+    x[roxy_section] <- sub("@section", "", x, fixed = TRUE)
+    x[roxy_section] <- sub(":$", "", x, fixed = F)
+
+  }
   uninteresting_headings <- paste(
     "(Tidy\\s?T(uesday|emplate)|Readme|Wrangle|Devel)$|error=TRUE",
     "url\\{|Error before installation|unreleased|Function ID$|Function Introduced",
@@ -150,14 +164,19 @@
 
 # Add variable to outline data frame --------------------
 
-define_outline_criteria <- function(.data, print_todo) {
+define_outline_criteria <- function(.data, exclude_todos) {
+  dir_common <- get_dir_common_outline(.data$file)
   x <- .data
   x$file_ext <- s_file_ext(x$file)
   x$is_md <- x$file_ext %in% c("qmd", "md", "Rmd", "Rmarkdown")
   x$is_news <- x$is_md & grepl("NEWS.md", x$file, fixed = TRUE)
-  x$is_md <- x$is_md & !x$is_news # treating news and other md files differently.
   x$is_test_file <- grepl("tests/testthat/test", x$file, fixed = TRUE)
+  x$is_notebook <- o_is_notebook(x = x$content, x$file, x$file_ext, x$line)
+  x$is_roxygen_comment <- o_is_roxygen_comment(x$content, x$file_ext, x$is_notebook)
+  x$content[x$is_notebook] <- sub("^#'\\s?", "", x$content[x$is_notebook])
+  x$is_md <- (x$is_md | x$is_roxygen_comment | x$is_notebook) & !x$is_news # treating news and other md files differently.
   x$is_snap_file <- grepl("_snaps", x$file, fixed = TRUE)
+
   x$is_roxygen_comment <- o_is_roxygen_comment(x$content, x$file_ext)
   if (any(x$is_roxygen_comment)) {
     # detect knitr notebooks
@@ -176,37 +195,75 @@
   } else {
     x$is_notebook <- FALSE
   }
+
+  should_parse_roxy_comments <-
+    !isFALSE(getOption("reuseme.roxy_parse", default = TRUE)) && # will not parse if option is set to FALSE
+    any(x$is_roxygen_comment)
+  if (should_parse_roxy_comments) {
+    # doing this created problems in tests?
+    if (interactive() && !is.null(dir_common) && is_rstudio()) {
+      # The idea is that roxygen2 may be better at getting objects if directory is changed.
+      # but don't bother doing this outside RStudio for now...
+      withr::local_dir(dir_common)
+      if (!fs::file_exists(x$file[1])) {
+        cli::cli_abort("Wrong dir done. file = {.file {x$file[1]}. dir = {.path {dir_common}}", .internal = TRUE)
+      }
+    }
+    rlang::check_installed(c("roxygen2", "tidyr"), "to create roxygen2 comments outline.")
+    files_with_roxy_comments <- unique(x[x$is_roxygen_comment, "file", drop = TRUE])
+    files_with_roxy_comments <- rlang::set_names(files_with_roxy_comments, files_with_roxy_comments)
+    # roxygen2 messages
+    # TRICK purrr::safely creates an error object, while possible is better.
+    # Suppresss roxygen2 message, suppress callr output, suppress asciicast warnings.
+    invisible(
+      utils::capture.output(
+        parsed_files <- purrr::map(
+          files_with_roxy_comments,
+          purrr::possibly(\(x) roxygen2::parse_file(x, env = NULL))))
+    ) |>
+      suppressMessages() |>
+      suppressWarnings()
+    # if roxygen2 cannot parse a file, let's just forget about it.
+    unparsed_files <- files_with_roxy_comments[which(is.null(parsed_files))]
+    # browser()
+    if (length(unparsed_files) > 0) {
+      cli::cli_inform("Could not parse roxygen comments in {.file {unparsed_files}}")
+    }
+    parsed_files <- purrr::compact(parsed_files)
+    processed_roxy <- join_roxy_fun(parsed_files)
+    outline_roxy <- define_outline_criteria_roxy(processed_roxy)
+  } else {
+    outline_roxy <- NULL
+  }
+
   x <- dplyr::mutate(
-    x,
+    x |> dplyr::filter(!is_roxygen_comment),
     # Problematic when looking inside functions
     # maybe force no leading space.
-    # TODO strip is_cli_info in Package? only valid for EDA
+    # TODO strip is_cli_info in Package? only valid for EDA (currently not showcased..)
     is_cli_info = o_is_cli_info(content, is_snap_file, file),
+    # TODO long enough to be meanignful?
+    # doc title cannot be after line 50 of a document.
     is_doc_title = stringr::str_detect(content, "(?<![-(#\\s?)_[:alpha:]'\"])title\\:.{4,100}") &
       !stringr::str_detect(content, "No Description|Ttitle|Subtitle|[Tt]est$|\\\\n") & line < 50 &
       !stringr::str_detect(dplyr::lag(content, default = "nothing to detect"), "```yaml"),
-    is_chunk_cap = stringr::str_detect(content, "\\#\\|.*(cap|title):"),
-    # deal with chunk cap
-    # FIXME try to detect all the chunk caption, but would have to figure out the end of it maybe {.pkg lightparser}.
-    is_chunk_cap_next = is_chunk_cap & stringr::str_detect(content, "\\s*[\\>\\|]$"),
-    is_chunk_cap = dplyr::case_when(
-      is_chunk_cap & is_chunk_cap_next ~ FALSE,
-      dplyr::lag(is_chunk_cap_next, default = FALSE) ~ TRUE,
-      .default = is_chunk_cap
-    ),
-    is_chunk_cap_next = is_chunk_cap,
+    is_obj_caption = stringr::str_detect(content, "\\#\\|\\s{1,2}[:alpha:]{0,5}[\\-\\.]?(cap|title)[:(\\s*=)]|```\\{r.*cap\\s?\\="),
     is_test_name = is_test_file & o_is_test_name(content) & !o_is_generic_test(content),
-    is_todo_fixme = print_todo & o_is_todo_fixme(content, is_roxygen_comment) & !is_snap_file,
+    is_todo_fixme = !exclude_todos & o_is_todo_fixme(content) & !o_is_roxygen_comment(content, file_ext, is_notebook) & !is_snap_file,
     is_section_title = o_is_section_title(content, is_roxygen_comment, is_todo_fixme),
     pkg_version = extract_pkg_version(content, is_news, is_section_title),
     is_section_title_source = is_section_title &
       stringr::str_detect(content, "[-\\=]{3,}|^\\#'") &
       stringr::str_detect(content, "[:alpha:]"),
-    n_leading_hash = nchar(stringr::str_extract(content, "\\#+")),
+    n_leading_hash = nchar(stringr::str_extract(content, "\\#+(?!\\|)")), # don't count hashpipe
     n_leading_hash = dplyr::coalesce(n_leading_hash, 0),
     # Make sure everything is second level in revdep/.
     n_leading_hash = n_leading_hash + grepl("revdep/", file, fixed = TRUE),
     is_second_level_heading_or_more = (is_section_title_source | is_section_title) & n_leading_hash > 1,
+    # roxygen2 title block
+    is_object_title = FALSE,
+    tag = NA_character_,
+    topic = NA_character_,
     is_cross_ref = stringr::str_detect(content, "docs_(links|add.+)?\\(.") & !stringr::str_detect(content, "@param|\\{\\."),
     is_function_def = grepl("<- function(", content, fixed = TRUE) & !stringr::str_starts(content, "\\s*#"),
     is_tab_or_plot_title = o_is_tab_plot_title(content) & !is_section_title & !is_function_def,
@@ -217,7 +274,73 @@
       line == 1 | !nzchar(dplyr::lead(content, default = "")) & !nzchar(dplyr::lag(content)),
     .by = "file"
   )
+  # browser()
+  res <- dplyr::bind_rows(x, outline_roxy)
+  res <- dplyr::filter(
+    res,
+    content != "NULL"
+  )
+  res <- dplyr::arrange(res, .data$file, .data$line)
+  #res$is_object_title[res$is_doc_title] <- FALSE
+  res
+}
+
+
+define_outline_criteria_roxy <- function(x) {
+  # TODO merge with define_outline_criteria
+  if (rlang::is_atomic(x)) {
+    # in tests, not interactively, got something bizzare
+    cli::cli_warn("x is {.obj_type_friendly {x}}.")
+    if (length(x) == 0) {
+      return(NULL)
+    }
+  }
+  x$is_md <- x$tag %in% c("subsection", "details", "description", "section")
+  # short topics are likely placeholders.
+  x$is_object_title <- x$tag == "title" & nchar(x$content) > 4
+  x$line <- as.integer(x$line)
+  x$file_ext <- "R"
+  # x$content <- paste0("#' ", x$content) # maybe not?
+  x$is_news <- FALSE
+  x$is_roxygen_comment <- TRUE
+  x$is_test_file <- FALSE
+  x$is_snap_file <- FALSE
+  x$before_and_after_empty <- TRUE
+  x$is_section_title <-
+    (x$tag %in% c("section", "subsection") & o_is_section_title(x$content, roxy_section = TRUE)) |
+    (x$tag %in% c("details", "description") & stringr::str_detect(x$content, "#\\s"))
+  x$is_section_title_source <- x$is_section_title
+  x$is_obj_caption <- FALSE
+  x$is_test_name <- FALSE
+  x$pkg_version <- NA_character_
+  # a family or concept can be seen as a plot subtitle?
+  x$is_tab_or_plot_title <- x$tag %in% c("family", "concept")
+  x$is_cli_info <- FALSE
+  x$is_cross_ref <- FALSE
+  x$is_function_def <- FALSE
+  x$is_todo_fixme <- FALSE
+  x$is_notebook <- FALSE
+  x$is_doc_title <- FALSE
+  #x$is_doc_title <- x$line == 1 & x$tag == "title"
+  x$n_leading_hash <- nchar(stringr::str_extract(x$content, "\\#+"))
+  x$n_leading_hash <- dplyr::case_when(
+    x$n_leading_hash > 0 ~ x$n_leading_hash,
+    # give second importance to doc sections..
+    x$tag == "section" & x$is_section_title_source ~ 2,
+    x$tag == "subsection" & x$is_section_title_source ~ 3,
+    .default = 0
+  )
+  x$content <- dplyr::case_when(
+    !x$is_section_title ~ x$content,
+    # : removed from section tag in join_roxy_fun()
+    # code section may not be that interesting..
+    x$tag == "section" ~ paste0("## ", x$content),
+    x$tag == "subsection" ~ paste0("### ", x$content),
+    .default = x$content
+  )
+  x$is_second_level_heading_or_more <- ((x$is_section_title_source | x$is_section_title) & x$n_leading_hash > 1)
+  # x$has_inline_markup <- FALSE # let's not mess with inline markup
   x
 }
 
-# it is {.file R/outline.R} ------
+# it is {.file R/outline.R} or {.file R/outline-roxy.R} ------