diff --git a/.ci/linters/md/news_linter.R b/.ci/linters/md/news_linter.R new file mode 100644 index 000000000..5eeb302a3 --- /dev/null +++ b/.ci/linters/md/news_linter.R @@ -0,0 +1,61 @@ +any_mismatch = FALSE + +# ensure that numbered list in each section is in sequence +check_section_numbering = function(news) { + # plain '#' catches some examples + sections = grep("^#+ [A-Z]", news) + entries = grep("^[0-9]+[.]", news) + entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries])) + section_id = findInterval(entries, sections) + + any_mismatch = FALSE + for (id in unique(section_id)) { + section_entries = entry_value[section_id == id] + intended_value = seq_along(section_entries) + matched = section_entries == intended_value + if (all(matched)) next + any_mismatch = TRUE + section_header = news[sections[id]] + cat(sprintf( + "In section '%s' (line %d), bad numbering:\n%s\n", + section_header, sections[id], + paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n") + )) + } + return(any_mismatch) +} + +# ensure that GitHub link text & URL actually agree +check_gh_links = function(news) { + gh_links_info = gregexpr( + "\\[#(?[0-9]+)\\]\\(https://github.com/Rdatatable/data.table/(?[^/]+)/(?[0-9]+)\\)", + news, + perl=TRUE # required for within-group indices + ) + gh_link_metadata = do.call(rbind, lapply(seq_along(gh_links_info), function(idx) { + x = gh_links_info[[idx]] + if (x[1L] <= 0L) return(NULL) + match_mat = attr(x, "capture.start") # matrix seeded with the correct dimensions + match_mat[] = substring(news[idx], match_mat, match_mat + attr(x, "capture.length") - 1L) + match_df = data.frame(match_mat) + match_df$line_number = idx + match_df + })) + matched = gh_link_metadata$md_number == gh_link_metadata$link_number + if (all(matched)) return(FALSE) + + cat(sep = "", with(gh_link_metadata[!matched, ], sprintf( + "In line %d, link pointing to %s %s is written #%s\n", + line_number, link_type, link_number, md_number + ))) + return(TRUE) +} + +any_error = FALSE +for (news in list.files(pattern = "NEWS")) { + cat(sprintf("Checking NEWS file %s...\n", news)) + news_lines = readLines("NEWS.md") + any_error = any_error || check_section_numbering(news_lines) + any_error = any_error || check_gh_links(news_lines) +} +if (any_error) stop("Please fix the NEWS issues above.") diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index b38a7929c..21b416dc7 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -70,3 +70,8 @@ jobs: } cat("All translation quality checks completed successfully!\n") shell: Rscript {0} + + lint-md: + - name: Lint + run: | + for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f) diff --git a/NEWS.md b/NEWS.md index 29047f1a1..c7aec62b1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -351,7 +351,7 @@ 23. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR. -24. `setcolorder()` gains `before=` and `after=`, [#4385](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. Also thanks to Manuel López-Ibáñez for testing dev and mentioning needed documentation before release. +24. `setcolorder()` gains `before=` and `after=`, [#4358](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. Also thanks to Manuel López-Ibáñez for testing dev and mentioning needed documentation before release. 25. `base::droplevels()` gains a fast method for `data.table`, [#647](https://github.com/Rdatatable/data.table/issues/647). Thanks to Steve Lianoglou for requesting, Boniface Kamgang and Martin Binder for testing, and Jan Gorecki and Benjamin Schwendinger for the PR. `fdroplevels()` for use on vectors has also been added.