diff --git a/.ci/.lintr.R b/.ci/.lintr.R index 6659efbc8..2481ecec9 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -1,5 +1,5 @@ dt_linters = new.env() -for (f in list.files('.ci/linters', full.names=TRUE)) sys.source(f, dt_linters) +for (f in list.files('.ci/linters/r', full.names=TRUE)) sys.source(f, dt_linters) rm(f) # NB: Could do this inside the linter definition, this separation makes those files more standardized diff --git a/.ci/linters/c/alloc_linter.R b/.ci/linters/c/alloc_linter.R new file mode 100644 index 000000000..a94bc27a9 --- /dev/null +++ b/.ci/linters/c/alloc_linter.R @@ -0,0 +1,36 @@ +# Ensure that we check the result of malloc()/calloc() for success +# More specifically, given that this is an AST-ignorant checker, +# 1. Find groups of malloc()/calloc() calls +# 2. Check the next line for a check like 'if (!x || !y)' +alloc_linter = function(c_file) { + lines <- readLines(c_file) + # Be a bit more precise to avoid mentions in comments + alloc_lines <- grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines) + if (!length(alloc_lines)) return() + # int *tmp=(int*)malloc(...); or just int tmp=malloc(...); + alloc_keys <- lines[alloc_lines] |> + strsplit(R"(\s*=\s*)") |> + vapply(head, 1L, FUN.VALUE="") |> + trimws() |> + # just normalize the more exotic assignments, namely 'type *restrict key = ...' + gsub(pattern = "[*]\\s*(restrict\\s*)?", replacement = "*") |> + strsplit("*", fixed=TRUE) |> + vapply(tail, 1L, FUN.VALUE="") + alloc_grp_id <- cumsum(c(TRUE, diff(alloc_lines) != 1L)) + + # execute by group + tapply(seq_along(alloc_lines), alloc_grp_id, function(grp_idx) { + keys_regex <- paste0("\\s*!\\s*", alloc_keys[grp_idx], "\\s*", collapse = "[|][|]") + check_regex <- paste0("if\\s*\\(", keys_regex) + check_line <- lines[alloc_lines[tail(grp_idx, 1L)] + 1L] + # Rarely (once in fread.c as of initialization), error checking is handled + # but not immediately, so use 'NOCHECK' to escape. + if (!grepl(check_regex, check_line) && !grepl("NOCHECK", check_line, fixed=TRUE)) { + bad_lines_idx <- seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L) + cat("FILE: ", c_file, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="") + writeLines(lines[bad_lines_idx]) + cat(strrep("-", max(nchar(lines[bad_lines_idx]))), "\n", sep="") + stop("Expected the malloc()/calloc() usage above to be followed immediately by error checking.", call.=FALSE) + } + }) +} diff --git a/.ci/linters/dt_lengths_linter.R b/.ci/linters/r/dt_lengths_linter.R similarity index 100% rename from .ci/linters/dt_lengths_linter.R rename to .ci/linters/r/dt_lengths_linter.R diff --git a/.ci/linters/dt_test_literal_linter.R b/.ci/linters/r/dt_test_literal_linter.R similarity index 100% rename from .ci/linters/dt_test_literal_linter.R rename to .ci/linters/r/dt_test_literal_linter.R diff --git a/.github/workflows/lint.yaml b/.github/workflows/code-quality.yaml similarity index 59% rename from .github/workflows/lint.yaml rename to .github/workflows/code-quality.yaml index d5a4ef466..f9066eacd 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/code-quality.yaml @@ -6,10 +6,10 @@ on: branches: - master -name: lint +name: code-quality jobs: - lint: + lint-r: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -33,3 +33,16 @@ jobs: env: LINTR_ERROR_ON_LINT: true R_LINTR_LINTER_FILE: .ci/.lintr + lint-c: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + - name: Lint + run: | + for (f in list.files('.ci/linters/c', full.names=TRUE)) source(f) + for (f in list.files('src', pattern='[.]c$', full.names=TRUE)) { + alloc_linter(f) + # TODO(#6272): Incorporate more checks from CRAN_Release + } + shell: Rscript {0} diff --git a/NEWS.md b/NEWS.md index 262aace17..ab412e052 100644 --- a/NEWS.md +++ b/NEWS.md @@ -116,6 +116,8 @@ 21. Refactored some non-API calls to R macros for S4 objects (#6180)[https://github.com/Rdatatable/data.table/issues/6180]. There should be no user-visible change. Thanks to various R users & R core for pushing to have a clearer definition of "API" for R, and thanks @MichaelChirico for implementing here. +22. C code was unified more in how failures to allocate memory (`malloc()`/`calloc()`) are handled, (#1115)[https://github.com/Rdatatable/data.table/issues/1115]. No OOM issues were reported, as these regions of code typically request relatively small blocks of memory, but it is good to handle memory pressure consistently. Thanks @elfring for the report and @MichaelChirico for the clean-up effort and future-proofing linter. + ## TRANSLATIONS 1. Fix a typo in a Mandarin translation of an error message that was hiding the actual error message, [#6172](https://github.com/Rdatatable/data.table/issues/6172). Thanks @trafficfan for the report and @MichaelChirico for the fix. diff --git a/src/assign.c b/src/assign.c index 3b7ac5faa..6eb0866e4 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1247,7 +1247,8 @@ void savetl_init(void) { nalloc = 100; saveds = (SEXP *)malloc(nalloc * sizeof(SEXP)); savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t)); - if (saveds==NULL || savedtl==NULL) { + if (!saveds || !savedtl) { + free(saveds); free(savedtl); savetl_end(); // # nocov error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov } diff --git a/src/bmerge.c b/src/bmerge.c index 2ce69904e..6cb20c683 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -129,8 +129,6 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SEXP retFirst = Calloc(anslen, int); // anslen is set above retLength = Calloc(anslen, int); retIndex = Calloc(anslen, int); - if (retFirst==NULL || retLength==NULL || retIndex==NULL) - error(_("Internal error in allocating memory for non-equi join")); // # nocov // initialise retIndex here directly, as next loop is meant for both equi and non-equi joins for (int j=0; j A(TL=1),B(2),C(3),D(4),E(5) => dupMap 1 2 3 5 6 | 8 7 4 // dupLink 7 8 | 6 (blank=0) - int *counts = (int *)calloc(nuniq, sizeof(int)); unsigned int mapsize = tablelen+nuniq; // lto compilation warning #5760 // +nuniq to store a 0 at the end of each group + int *counts = (int *)calloc(nuniq, sizeof(int)); int *map = (int *)calloc(mapsize, sizeof(int)); if (!counts || !map) { + free(counts); free(map); // # nocov start for (int i=0; i0) savetl(s);