Skip to content

Commit

Permalink
Refactor C linters for better extensibility/reuse (#6346)
Browse files Browse the repository at this point in the history
Most importantly, we can run `readLines()` + preprocess the file only once, then pass that output to each linter.
  • Loading branch information
MichaelChirico authored Aug 19, 2024
1 parent fbdfb62 commit 8af7b43
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 16 deletions.
20 changes: 10 additions & 10 deletions .ci/linters/c/alloc_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,32 @@
# More specifically, given that this is an AST-ignorant checker,
# 1. Find groups of malloc()/calloc() calls
# 2. Check the next line for a check like 'if (!x || !y)'
alloc_linter = function(c_file) {
lines <- readLines(c_file)
alloc_linter = function(c_obj) {
lines = c_obj$lines
# Be a bit more precise to avoid mentions in comments
alloc_lines <- grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines)
alloc_lines = grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines)
if (!length(alloc_lines)) return()
# int *tmp=(int*)malloc(...); or just int tmp=malloc(...);
alloc_keys <- lines[alloc_lines] |>
alloc_keys = lines[alloc_lines] |>
strsplit(R"(\s*=\s*)") |>
vapply(head, 1L, FUN.VALUE="") |>
trimws() |>
# just normalize the more exotic assignments, namely 'type *restrict key = ...'
gsub(pattern = "[*]\\s*(restrict\\s*)?", replacement = "*") |>
strsplit("*", fixed=TRUE) |>
vapply(tail, 1L, FUN.VALUE="")
alloc_grp_id <- cumsum(c(TRUE, diff(alloc_lines) != 1L))
alloc_grp_id = cumsum(c(TRUE, diff(alloc_lines) != 1L))

# execute by group
tapply(seq_along(alloc_lines), alloc_grp_id, function(grp_idx) {
keys_regex <- paste0("\\s*!\\s*", alloc_keys[grp_idx], "\\s*", collapse = "[|][|]")
check_regex <- paste0("if\\s*\\(", keys_regex)
check_line <- lines[alloc_lines[tail(grp_idx, 1L)] + 1L]
keys_regex = paste0("\\s*!\\s*", alloc_keys[grp_idx], "\\s*", collapse = "[|][|]")
check_regex = paste0("if\\s*\\(", keys_regex)
check_line = lines[alloc_lines[tail(grp_idx, 1L)] + 1L]
# Rarely (once in fread.c as of initialization), error checking is handled
# but not immediately, so use 'NOCHECK' to escape.
if (!grepl(check_regex, check_line) && !grepl("NOCHECK", check_line, fixed=TRUE)) {
bad_lines_idx <- seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L)
cat("FILE: ", c_file, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="")
bad_lines_idx = seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L)
cat("FILE: ", c_obj$path, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="")
writeLines(lines[bad_lines_idx])
cat(strrep("-", max(nchar(lines[bad_lines_idx]))), "\n", sep="")
stop("Expected the malloc()/calloc() usage above to be followed immediately by error checking.", call.=FALSE)
Expand Down
8 changes: 3 additions & 5 deletions .ci/linters/c/omp_set_num_threads_linter.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@

# Ensure no calls to omp_set_num_threads() [to avoid affecting other packages and base R]
# Only comments referring to it should be in openmp-utils.c
omp_set_num_threads_linter = function(c_file) {
omp_set_num_threads_linter = function(c_obj) {
# strip comments, we only care if the function appears in actual code.
processed_lines = system2("gcc", c("-fpreprocessed", "-E", c_file), stdout=TRUE, stderr=FALSE)
idx = grep("omp_set_num_threads", processed_lines, fixed = TRUE)
idx = grep("omp_set_num_threads", c_obj$preprocessed, fixed = TRUE)
if (!length(idx)) return()
stop(sprintf(
"In %s, found omp_set_num_threads() usage, which could affect other packages and base R:\n%s",
c_file, paste0(" ", format(idx), ":", processed_lines[idx], collapse = "\n")
c_obj$path, paste0(" ", format(idx), ":", c_obj$preprocessed[idx], collapse = "\n")
))
}
4 changes: 3 additions & 1 deletion .github/workflows/code-quality.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ jobs:
linter_env = new.env()
for (f in list.files('.ci/linters/c', full.names=TRUE)) sys.source(f, linter_env)
for (f in list.files('src', pattern='[.][ch]$', full.names=TRUE)) {
for (linter in ls(linter_env)) linter_env[[linter]](f)
c_obj = list(path = f, lines = readLines(f))
c_obj$preprocessed = system2("gcc", c("-fpreprocessed", "-E", f), stdout=TRUE, stderr=FALSE)
for (linter in ls(linter_env)) linter_env[[linter]](c_obj)
# TODO(#6272): Incorporate more checks from CRAN_Release
}
shell: Rscript {0}
Expand Down

0 comments on commit 8af7b43

Please sign in to comment.