Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

control compression level for fwrite with gzip #5513

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion R/fwrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
buffMB=8, nThread=getDTthreads(verbose),
showProgress=getOption("datatable.showProgress", interactive()),
compress = c("auto", "none", "gzip"),
gzipLevel = -1:9,
yaml = FALSE,
bom = FALSE,
verbose=getOption("datatable.verbose", FALSE),
Expand All @@ -20,6 +21,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
}
if (missing(qmethod)) qmethod = qmethod[1L]
if (missing(compress)) compress = compress[1L]
if (missing(gzipLevel)) gzipLevel = gzipLevel[1L]
if (missing(dateTimeAs)) { dateTimeAs = dateTimeAs[1L] }
else if (length(dateTimeAs)>1L) stopf("dateTimeAs must be a single string")
dateTimeAs = chmatch(dateTimeAs, c("ISO","squash","epoch","write.csv"))-1L
Expand All @@ -34,6 +36,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
scipen = if (is.numeric(scipen)) as.integer(scipen) else 0L
buffMB = as.integer(buffMB)
nThread = as.integer(nThread)
gzipLevel = as.integer(gzipLevel)
# write.csv default is 'double' so fwrite follows suit. write.table's default is 'escape'
# validate arguments
if (is.matrix(x)) { # coerce to data.table if input object is matrix
Expand All @@ -49,6 +52,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
is.character(eol) && length(eol)==1L,
length(qmethod) == 1L && qmethod %chin% c("double", "escape"),
length(compress) == 1L && compress %chin% c("auto", "none", "gzip"),
length(gzipLevel) == 1L && -1L<=gzipLevel && gzipLevel<=9L,
isTRUEorFALSE(col.names), isTRUEorFALSE(append), isTRUEorFALSE(row.names),
isTRUEorFALSE(verbose), isTRUEorFALSE(showProgress), isTRUEorFALSE(logical01),
isTRUEorFALSE(bom),
Expand Down Expand Up @@ -111,7 +115,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
file = enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078.
.Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
showProgress, is_gzip, bom, yaml, verbose, encoding)
showProgress, is_gzip, gzipLevel, bom, yaml, verbose, encoding)
invisible()
}

2 changes: 1 addition & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ SEXP chmatch_R(SEXP, SEXP, SEXP);
SEXP chmatchdup_R(SEXP, SEXP, SEXP);
SEXP chin_R(SEXP, SEXP);
SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP rbindlist(SEXP, SEXP, SEXP, SEXP);
SEXP setlistelt(SEXP, SEXP, SEXP);
SEXP address(SEXP);
Expand Down
4 changes: 3 additions & 1 deletion src/fwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ static bool qmethodEscape=false; // when quoting fields, how to escape dou
static int scipen;
static bool squashDateTime=false; // 0=ISO(yyyy-mm-dd) 1=squash(yyyymmdd)
static bool verbose=false;
static int gzip_level;

extern const char *getString(const void *, int64_t);
extern int getStringLen(const void *, int64_t);
Expand Down Expand Up @@ -567,7 +568,7 @@ int init_stream(z_stream *stream) {
stream->opaque = Z_NULL;

// 31 comes from : windows bits 15 | 16 gzip format
int err = deflateInit2(stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8, Z_DEFAULT_STRATEGY);
int err = deflateInit2(stream, gzip_level==-1 ? Z_DEFAULT_COMPRESSION : gzip_level, Z_DEFLATED, 31, 8, Z_DEFAULT_STRATEGY);
return err; // # nocov
}

Expand Down Expand Up @@ -600,6 +601,7 @@ void fwriteMain(fwriteMainArgs args)
scipen = args.scipen;
doQuote = args.doQuote;
verbose = args.verbose;
gzip_level = args.gzip_level;

// When NA is a non-empty string, then we must quote all string fields in case they contain the na string
// na is recommended to be empty, though
Expand Down
1 change: 1 addition & 0 deletions src/fwrite.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ typedef struct fwriteMainArgs
int nth;
bool showProgress;
bool is_gzip;
int gzip_level;
bool bom;
const char *yaml;
bool verbose;
Expand Down
2 changes: 2 additions & 0 deletions src/fwriteR.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ SEXP fwriteR(
SEXP nThread_Arg,
SEXP showProgress_Arg,
SEXP is_gzip_Arg,
SEXP gzip_level_Arg,
SEXP bom_Arg,
SEXP yaml_Arg,
SEXP verbose_Arg,
Expand All @@ -177,6 +178,7 @@ SEXP fwriteR(

fwriteMainArgs args = {0}; // {0} to quieten valgrind's uninitialized, #4639
args.is_gzip = LOGICAL(is_gzip_Arg)[0];
args.gzip_level = INTEGER(gzip_level_Arg)[0];
args.bom = LOGICAL(bom_Arg)[0];
args.yaml = CHAR(STRING_ELT(yaml_Arg, 0));
args.verbose = LOGICAL(verbose_Arg)[0];
Expand Down