From 40d4fc59501a51aaf4730a1ba9a52cbcf52982f4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 3 Aug 2024 05:01:19 +0000 Subject: [PATCH 1/5] Add a linter for calls to omp_set_num_threads --- .ci/linters/c/omp_set_num_threads_linter.R | 11 +++++++++++ .dev/CRAN_Release.cmd | 3 --- .github/workflows/code-quality.yaml | 7 ++++--- 3 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 .ci/linters/c/omp_set_num_threads_linter.R diff --git a/.ci/linters/c/omp_set_num_threads_linter.R b/.ci/linters/c/omp_set_num_threads_linter.R new file mode 100644 index 000000000..6a94b2527 --- /dev/null +++ b/.ci/linters/c/omp_set_num_threads_linter.R @@ -0,0 +1,11 @@ +# Ensure no calls to omp_set_num_threads() [to avoid affecting other packages and base R] +# Only comments referring to it should be in openmp-utils.c +omp_set_num_threads_linter = function(c_file) { + processed_lines = system2("gcc", c("-fpreprocessed", "-E", "-xc", "-"), stdin=c_file, stdout=TRUE, stderr=FALSE) + idx = grep("omp_set_num_threads", processed_lines, fixed = TRUE) + if (!length(idx)) return() + stop(sprintf( + "In %s, found 'omp_set_num_threads() usage, which could affect other packages and base R:\n%s", + c_file, paste0(" ", format(idx), ":", processed_lines[idx], collapse = "\n") + )) +} diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index ebf17bcd2..eeddd7271 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -53,9 +53,6 @@ grep -RI --exclude-dir=".git" --exclude="*.md" --exclude="*~" --color='auto' -P # Unicode is now ok. This unicode in tests.Rraw is passing on CRAN. grep -RI --exclude-dir=".git" --exclude="*.md" --exclude="*~" --color='auto' -n "[\]u[0-9]" ./ -# Ensure no calls to omp_set_num_threads() [to avoid affecting other packages and base R] -# Only comments referring to it should be in openmp-utils.c -grep omp_set_num_threads ./src/* # Ensure no calls to omp_set_nested() as i) it's hard to fully honor OMP_THREAD_LIMIT as required by CRAN, and # ii) a simpler non-nested approach is always preferable if possible, as has been the case so far diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index f9066eacd..960315e78 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -40,9 +40,10 @@ jobs: - uses: r-lib/actions/setup-r@v2 - name: Lint run: | - for (f in list.files('.ci/linters/c', full.names=TRUE)) source(f) - for (f in list.files('src', pattern='[.]c$', full.names=TRUE)) { - alloc_linter(f) + linter_env = new.env() + for (f in list.files('.ci/linters/c', full.names=TRUE)) sys.source(f, linter_env) + for (f in list.files('src', pattern='[.][ch]$', full.names=TRUE)) { + for (linter in ls(linter_env)) linter_env[[linter]](f) # TODO(#6272): Incorporate more checks from CRAN_Release } shell: Rscript {0} From 85f1c7e8a068c98189830d9109f22ac4286d8a12 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 3 Aug 2024 05:04:53 +0000 Subject: [PATCH 2/5] simplify, comment --- .ci/linters/c/omp_set_num_threads_linter.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.ci/linters/c/omp_set_num_threads_linter.R b/.ci/linters/c/omp_set_num_threads_linter.R index 6a94b2527..a707d1748 100644 --- a/.ci/linters/c/omp_set_num_threads_linter.R +++ b/.ci/linters/c/omp_set_num_threads_linter.R @@ -1,11 +1,13 @@ + # Ensure no calls to omp_set_num_threads() [to avoid affecting other packages and base R] # Only comments referring to it should be in openmp-utils.c omp_set_num_threads_linter = function(c_file) { - processed_lines = system2("gcc", c("-fpreprocessed", "-E", "-xc", "-"), stdin=c_file, stdout=TRUE, stderr=FALSE) + # strip comments, we only care if the function appears in actual code. + processed_lines = system2("gcc", c("-fpreprocessed", "-E", "-xc", "-", c_file), stdout=TRUE, stderr=FALSE) idx = grep("omp_set_num_threads", processed_lines, fixed = TRUE) if (!length(idx)) return() stop(sprintf( - "In %s, found 'omp_set_num_threads() usage, which could affect other packages and base R:\n%s", + "In %s, found omp_set_num_threads() usage, which could affect other packages and base R:\n%s", c_file, paste0(" ", format(idx), ":", processed_lines[idx], collapse = "\n") )) } From 6ed7ab9689b1ce907a2a7be590aed3651ef9f386 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 2 Aug 2024 22:27:36 -0700 Subject: [PATCH 3/5] refactor --- .ci/linters/c/alloc_linter.R | 6 +++--- .ci/linters/c/omp_set_num_threads_linter.R | 8 +++----- .github/workflows/code-quality.yaml | 4 +++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.ci/linters/c/alloc_linter.R b/.ci/linters/c/alloc_linter.R index a94bc27a9..cd040f4c2 100644 --- a/.ci/linters/c/alloc_linter.R +++ b/.ci/linters/c/alloc_linter.R @@ -2,8 +2,8 @@ # More specifically, given that this is an AST-ignorant checker, # 1. Find groups of malloc()/calloc() calls # 2. Check the next line for a check like 'if (!x || !y)' -alloc_linter = function(c_file) { - lines <- readLines(c_file) +alloc_linter = function(c_obj) { + lines <- c_obj$lines # Be a bit more precise to avoid mentions in comments alloc_lines <- grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines) if (!length(alloc_lines)) return() @@ -27,7 +27,7 @@ alloc_linter = function(c_file) { # but not immediately, so use 'NOCHECK' to escape. if (!grepl(check_regex, check_line) && !grepl("NOCHECK", check_line, fixed=TRUE)) { bad_lines_idx <- seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L) - cat("FILE: ", c_file, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="") + cat("FILE: ", c_obj$path, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="") writeLines(lines[bad_lines_idx]) cat(strrep("-", max(nchar(lines[bad_lines_idx]))), "\n", sep="") stop("Expected the malloc()/calloc() usage above to be followed immediately by error checking.", call.=FALSE) diff --git a/.ci/linters/c/omp_set_num_threads_linter.R b/.ci/linters/c/omp_set_num_threads_linter.R index a707d1748..f4a89f2b4 100644 --- a/.ci/linters/c/omp_set_num_threads_linter.R +++ b/.ci/linters/c/omp_set_num_threads_linter.R @@ -1,13 +1,11 @@ - # Ensure no calls to omp_set_num_threads() [to avoid affecting other packages and base R] # Only comments referring to it should be in openmp-utils.c -omp_set_num_threads_linter = function(c_file) { +omp_set_num_threads_linter = function(c_obj) { # strip comments, we only care if the function appears in actual code. - processed_lines = system2("gcc", c("-fpreprocessed", "-E", "-xc", "-", c_file), stdout=TRUE, stderr=FALSE) - idx = grep("omp_set_num_threads", processed_lines, fixed = TRUE) + idx = grep("omp_set_num_threads", c_obj$preprocessed, fixed = TRUE) if (!length(idx)) return() stop(sprintf( "In %s, found omp_set_num_threads() usage, which could affect other packages and base R:\n%s", - c_file, paste0(" ", format(idx), ":", processed_lines[idx], collapse = "\n") + c_obj$path, paste0(" ", format(idx), ":", c_obj$preprocessed[idx], collapse = "\n") )) } diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 960315e78..910b17e6c 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -43,7 +43,9 @@ jobs: linter_env = new.env() for (f in list.files('.ci/linters/c', full.names=TRUE)) sys.source(f, linter_env) for (f in list.files('src', pattern='[.][ch]$', full.names=TRUE)) { - for (linter in ls(linter_env)) linter_env[[linter]](f) + c_obj = list(path = f, lines = readLines(f)) + c_obj$preprocessed = system2("gcc", c("-fpreprocessed", "-E", "-xc", "-", f), stdout=TRUE, stderr=FALSE) + for (linter in ls(linter_env)) linter_env[[linter]](c_obj) # TODO(#6272): Incorporate more checks from CRAN_Release } shell: Rscript {0} From 560fd51998fcbd5c77869577855f5b7411bb9349 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 4 Aug 2024 23:53:22 -0700 Subject: [PATCH 4/5] fix issue with '-xc -' args to gcc again --- .github/workflows/code-quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 910b17e6c..6d3325ce3 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -44,7 +44,7 @@ jobs: for (f in list.files('.ci/linters/c', full.names=TRUE)) sys.source(f, linter_env) for (f in list.files('src', pattern='[.][ch]$', full.names=TRUE)) { c_obj = list(path = f, lines = readLines(f)) - c_obj$preprocessed = system2("gcc", c("-fpreprocessed", "-E", "-xc", "-", f), stdout=TRUE, stderr=FALSE) + c_obj$preprocessed = system2("gcc", c("-fpreprocessed", "-E", f), stdout=TRUE, stderr=FALSE) for (linter in ls(linter_env)) linter_env[[linter]](c_obj) # TODO(#6272): Incorporate more checks from CRAN_Release } From 35b178d0d89477a18750568f2341490bf3a7714c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 19 Aug 2024 09:04:49 -0700 Subject: [PATCH 5/5] Use '=' --- .ci/linters/c/alloc_linter.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.ci/linters/c/alloc_linter.R b/.ci/linters/c/alloc_linter.R index cd040f4c2..8fb6b2dd0 100644 --- a/.ci/linters/c/alloc_linter.R +++ b/.ci/linters/c/alloc_linter.R @@ -3,12 +3,12 @@ # 1. Find groups of malloc()/calloc() calls # 2. Check the next line for a check like 'if (!x || !y)' alloc_linter = function(c_obj) { - lines <- c_obj$lines + lines = c_obj$lines # Be a bit more precise to avoid mentions in comments - alloc_lines <- grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines) + alloc_lines = grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines) if (!length(alloc_lines)) return() # int *tmp=(int*)malloc(...); or just int tmp=malloc(...); - alloc_keys <- lines[alloc_lines] |> + alloc_keys = lines[alloc_lines] |> strsplit(R"(\s*=\s*)") |> vapply(head, 1L, FUN.VALUE="") |> trimws() |> @@ -16,17 +16,17 @@ alloc_linter = function(c_obj) { gsub(pattern = "[*]\\s*(restrict\\s*)?", replacement = "*") |> strsplit("*", fixed=TRUE) |> vapply(tail, 1L, FUN.VALUE="") - alloc_grp_id <- cumsum(c(TRUE, diff(alloc_lines) != 1L)) + alloc_grp_id = cumsum(c(TRUE, diff(alloc_lines) != 1L)) # execute by group tapply(seq_along(alloc_lines), alloc_grp_id, function(grp_idx) { - keys_regex <- paste0("\\s*!\\s*", alloc_keys[grp_idx], "\\s*", collapse = "[|][|]") - check_regex <- paste0("if\\s*\\(", keys_regex) - check_line <- lines[alloc_lines[tail(grp_idx, 1L)] + 1L] + keys_regex = paste0("\\s*!\\s*", alloc_keys[grp_idx], "\\s*", collapse = "[|][|]") + check_regex = paste0("if\\s*\\(", keys_regex) + check_line = lines[alloc_lines[tail(grp_idx, 1L)] + 1L] # Rarely (once in fread.c as of initialization), error checking is handled # but not immediately, so use 'NOCHECK' to escape. if (!grepl(check_regex, check_line) && !grepl("NOCHECK", check_line, fixed=TRUE)) { - bad_lines_idx <- seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L) + bad_lines_idx = seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L) cat("FILE: ", c_obj$path, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="") writeLines(lines[bad_lines_idx]) cat(strrep("-", max(nchar(lines[bad_lines_idx]))), "\n", sep="")