From 0bc874cc728c0c650acbf3932d793dae60c7537e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 5 Aug 2024 14:04:07 -0700 Subject: [PATCH] Add a NEWS linter for checking GitHub link consistency (#6343) * New md linter for GitHub link consistency * copy change over * fixed bad refactor Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> * fix cont'd Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> * helpful comment * better programming -> obviate need for names(); better naming --------- Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- .ci/linters/md/news_linter.R | 27 +++++++++++++++++++++++++++ NEWS.md | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/.ci/linters/md/news_linter.R b/.ci/linters/md/news_linter.R index 9209d94a8..5eeb302a3 100644 --- a/.ci/linters/md/news_linter.R +++ b/.ci/linters/md/news_linter.R @@ -25,10 +25,37 @@ check_section_numbering = function(news) { return(any_mismatch) } +# ensure that GitHub link text & URL actually agree +check_gh_links = function(news) { + gh_links_info = gregexpr( + "\\[#(?[0-9]+)\\]\\(https://github.com/Rdatatable/data.table/(?[^/]+)/(?[0-9]+)\\)", + news, + perl=TRUE # required for within-group indices + ) + gh_link_metadata = do.call(rbind, lapply(seq_along(gh_links_info), function(idx) { + x = gh_links_info[[idx]] + if (x[1L] <= 0L) return(NULL) + match_mat = attr(x, "capture.start") # matrix seeded with the correct dimensions + match_mat[] = substring(news[idx], match_mat, match_mat + attr(x, "capture.length") - 1L) + match_df = data.frame(match_mat) + match_df$line_number = idx + match_df + })) + matched = gh_link_metadata$md_number == gh_link_metadata$link_number + if (all(matched)) return(FALSE) + + cat(sep = "", with(gh_link_metadata[!matched, ], sprintf( + "In line %d, link pointing to %s %s is written #%s\n", + line_number, link_type, link_number, md_number + ))) + return(TRUE) +} + any_error = FALSE for (news in list.files(pattern = "NEWS")) { cat(sprintf("Checking NEWS file %s...\n", news)) news_lines = readLines("NEWS.md") any_error = any_error || check_section_numbering(news_lines) + any_error = any_error || check_gh_links(news_lines) } if (any_error) stop("Please fix the NEWS issues above.") diff --git a/NEWS.md b/NEWS.md index ee587597d..14afa97e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -346,7 +346,7 @@ This feature resolves [#4387](https://github.com/Rdatatable/data.table/issues/43 23. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR. -24. `setcolorder()` gains `before=` and `after=`, [#4385](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. Also thanks to Manuel López-Ibáñez for testing dev and mentioning needed documentation before release. +24. `setcolorder()` gains `before=` and `after=`, [#4358](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. Also thanks to Manuel López-Ibáñez for testing dev and mentioning needed documentation before release. 25. `base::droplevels()` gains a fast method for `data.table`, [#647](https://github.com/Rdatatable/data.table/issues/647). Thanks to Steve Lianoglou for requesting, Boniface Kamgang and Martin Binder for testing, and Jan Gorecki and Benjamin Schwendinger for the PR. `fdroplevels()` for use on vectors has also been added.