diff --git a/R/RcppExports.R b/R/RcppExports.R
index 77566d09a..43a519d5c 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -340,6 +340,10 @@ set_sst <- function(sharedStrings) {
.Call(`_openxlsx2_set_sst`, sharedStrings)
}
+write_worksheet_slim <- function(sheet_data, prior, post, fl) {
+ invisible(.Call(`_openxlsx2_write_worksheet_slim`, sheet_data, prior, post, fl))
+}
+
write_worksheet <- function(prior, post, sheet_data) {
.Call(`_openxlsx2_write_worksheet`, prior, post, sheet_data)
}
diff --git a/R/class-workbook.R b/R/class-workbook.R
index 0c9ac0202..bb5189bf4 100644
--- a/R/class-workbook.R
+++ b/R/class-workbook.R
@@ -9788,41 +9788,64 @@ wbWorkbook <- R6::R6Class(
prior <- self$worksheets[[i]]$get_prior_sheet_data()
post <- self$worksheets[[i]]$get_post_sheet_data()
- if (!is.null(self$worksheets[[i]]$sheet_data$cc)) {
+ use_pugixml_export <- getOption("openxlsx2.export_with_pugi", default = TRUE)
- self$worksheets[[i]]$sheet_data$cc$r <- with(
- self$worksheets[[i]]$sheet_data$cc,
- stringi::stri_join(c_r, row_r)
- )
- cc <- self$worksheets[[i]]$sheet_data$cc
- # prepare data for output
+ if (use_pugixml_export) {
+ # failsaves. check that all rows and cells
+ # are available and in the correct order
+ if (!is.null(self$worksheets[[i]]$sheet_data$cc)) {
+
+ self$worksheets[[i]]$sheet_data$cc$r <- with(
+ self$worksheets[[i]]$sheet_data$cc,
+ stringi::stri_join(c_r, row_r)
+ )
+ cc <- self$worksheets[[i]]$sheet_data$cc
+ # prepare data for output
- # there can be files, where row_attr is incomplete because a row
- # is lacking any attributes (presumably was added before saving)
- # still row_attr is what we want!
+ # there can be files, where row_attr is incomplete because a row
+ # is lacking any attributes (presumably was added before saving)
+ # still row_attr is what we want!
- rows_attr <- self$worksheets[[i]]$sheet_data$row_attr
- self$worksheets[[i]]$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
+ rows_attr <- self$worksheets[[i]]$sheet_data$row_attr
+ self$worksheets[[i]]$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]
- cc_rows <- self$worksheets[[i]]$sheet_data$row_attr$r
- # c("row_r", "c_r", "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")
- cc <- cc[cc$row_r %in% cc_rows, ]
+ cc_rows <- self$worksheets[[i]]$sheet_data$row_attr$r
+ # c("row_r", "c_r", "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")
+ cc <- cc[cc$row_r %in% cc_rows, ]
- self$worksheets[[i]]$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
- } else {
- self$worksheets[[i]]$sheet_data$row_attr <- NULL
- self$worksheets[[i]]$sheet_data$cc <- NULL
+ self$worksheets[[i]]$sheet_data$cc <- cc[order(as.integer(cc[, "row_r"]), col2int(cc[, "c_r"])), ]
+ rm(cc)
+ } else {
+ self$worksheets[[i]]$sheet_data$row_attr <- NULL
+ self$worksheets[[i]]$sheet_data$cc <- NULL
+ }
}
- # create entire sheet prior to writing it
- sheet_xml <- write_worksheet(
- prior = prior,
- post = post,
- sheet_data = self$worksheets[[i]]$sheet_data
- )
ws_file <- file.path(xlworksheetsDir, sprintf("sheet%s.xml", i))
- write_xmlPtr(doc = sheet_xml, fl = ws_file)
- rm(sheet_xml)
+
+ if (use_pugixml_export) {
+
+ # create entire sheet prior to writing it
+ sheet_xml <- write_worksheet(
+ prior = prior,
+ post = post,
+ sheet_data = self$worksheets[[i]]$sheet_data
+ )
+ write_xmlPtr(doc = sheet_xml, fl = ws_file)
+
+ } else {
+
+ if (grepl("", prior))
+ prior <- substr(prior, 1, nchar(prior) - 13) # remove " "
+
+ write_worksheet_slim(
+ sheet_data = self$worksheets[[i]]$sheet_data,
+ prior = prior,
+ post = post,
+ fl = ws_file
+ )
+
+ }
## write worksheet rels
if (length(self$worksheets_rels[[i]])) {
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index afab64d3f..eee07da48 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -853,6 +853,19 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
+// write_worksheet_slim
+void write_worksheet_slim(Rcpp::Environment sheet_data, std::string prior, std::string post, std::string fl);
+RcppExport SEXP _openxlsx2_write_worksheet_slim(SEXP sheet_dataSEXP, SEXP priorSEXP, SEXP postSEXP, SEXP flSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< Rcpp::Environment >::type sheet_data(sheet_dataSEXP);
+ Rcpp::traits::input_parameter< std::string >::type prior(priorSEXP);
+ Rcpp::traits::input_parameter< std::string >::type post(postSEXP);
+ Rcpp::traits::input_parameter< std::string >::type fl(flSEXP);
+ write_worksheet_slim(sheet_data, prior, post, fl);
+ return R_NilValue;
+END_RCPP
+}
// write_worksheet
XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment& sheet_data);
RcppExport SEXP _openxlsx2_write_worksheet(SEXP priorSEXP, SEXP postSEXP, SEXP sheet_dataSEXP) {
@@ -1039,6 +1052,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_openxlsx2_read_colors", (DL_FUNC) &_openxlsx2_read_colors, 1},
{"_openxlsx2_write_colors", (DL_FUNC) &_openxlsx2_write_colors, 1},
{"_openxlsx2_set_sst", (DL_FUNC) &_openxlsx2_set_sst, 1},
+ {"_openxlsx2_write_worksheet_slim", (DL_FUNC) &_openxlsx2_write_worksheet_slim, 4},
{"_openxlsx2_write_worksheet", (DL_FUNC) &_openxlsx2_write_worksheet, 3},
{"_openxlsx2_write_xmlPtr", (DL_FUNC) &_openxlsx2_write_xmlPtr, 2},
{"_openxlsx2_styles_bin", (DL_FUNC) &_openxlsx2_styles_bin, 3},
diff --git a/src/write_file.cpp b/src/write_file.cpp
index 7509307b3..2f81d83ce 100644
--- a/src/write_file.cpp
+++ b/src/write_file.cpp
@@ -20,6 +20,192 @@ Rcpp::CharacterVector set_sst(Rcpp::CharacterVector sharedStrings) {
return sst;
}
+// write xml by streaming to files. this takes whatever input we provide and
+// dumps it into the file. no xml checking, no unicode checking
+void xml_sheet_data_slim(
+ Rcpp::DataFrame row_attr,
+ Rcpp::DataFrame cc,
+ std::string prior,
+ std::string post,
+ std::string fl
+) {
+
+ bool has_cm = cc.containsElementNamed("c_cm");
+ bool has_ph = cc.containsElementNamed("c_ph");
+ bool has_vm = cc.containsElementNamed("c_vm");
+
+ std::ofstream file(fl);
+
+ auto lastrow = 0; // integer value of the last row with column data
+ auto thisrow = 0; // integer value of the current row with column data
+ auto row_idx = 0; // the index of the row_attr file. this is != rowid
+ auto rowid = 0; // integer value of the r field in row_attr
+
+ std::string xml_preserver = " ";
+
+ file << "\n";
+ file << prior;
+
+ Rcpp::CharacterVector cc_c_cm, cc_c_ph, cc_c_vm;
+
+ if (cc.nrow() && cc.ncol()) {
+ // we cannot access rows directly in the dataframe.
+ // Have to extract the columns and use these
+ Rcpp::CharacterVector cc_row_r = cc["row_r"]; // 1
+ Rcpp::CharacterVector cc_r = cc["r"]; // A1
+ Rcpp::CharacterVector cc_v = cc["v"];
+ Rcpp::CharacterVector cc_c_t = cc["c_t"];
+ Rcpp::CharacterVector cc_c_s = cc["c_s"];
+ if (has_cm) cc_c_cm = cc["c_cm"];
+ if (has_ph) cc_c_ph = cc["c_ph"];
+ if (has_vm) cc_c_vm = cc["c_vm"];
+ Rcpp::CharacterVector cc_f = cc["f"];
+ Rcpp::CharacterVector cc_f_attr = cc["f_attr"];
+ Rcpp::CharacterVector cc_is = cc["is"];
+
+ Rcpp::CharacterVector row_r = row_attr["r"];
+
+
+ file << "";
+ for (auto i = 0; i < cc.nrow(); ++i) {
+
+ thisrow = std::stoi(Rcpp::as(cc_row_r[i]));
+
+ if (lastrow < thisrow) {
+
+ // there might be entirely empty rows in between. this is the case for
+ // loadExample. We check the rowid and write the line and skip until we
+ // have every row and only then continue writing the column
+ while (rowid < thisrow) {
+
+ rowid = std::stoi(Rcpp::as(
+ row_r[row_idx]
+ ));
+
+ if (row_idx) file << "";
+ file << "(row_attr[j])[row_idx];
+
+ if (cv_s[0] != "") {
+ const std::string val_strl = Rcpp::as(cv_s);
+ file << " " << attrnams[j] << "=\"" << val_strl.c_str() << "\"";
+ }
+ }
+ file << ">"; // end
+
+ // read the next row_idx when visiting again
+ ++row_idx;
+ }
+ }
+
+ // create node
+ file << "
+ file << " r" << "=\"" << to_string(cc_r[i]).c_str() << "\"";
+
+ if (!to_string(cc_c_s[i]).empty())
+ file << " s" << "=\"" << to_string(cc_c_s[i]).c_str() << "\"";
+
+ // assign type if not aka numeric
+ if (!to_string(cc_c_t[i]).empty())
+ file << " t" << "=\"" << to_string(cc_c_t[i]).c_str() << "\"";
+
+ // CellMetaIndex: suppress curly brackets in spreadsheet software
+ if (has_cm && !to_string(cc_c_cm[i]).empty())
+ file << " cm" << "=\"" << to_string(cc_c_cm[i]).c_str() << "\"";
+
+ // phonetics spelling
+ if (has_ph && !to_string(cc_c_ph[i]).empty())
+ file << " ph" << "=\"" << to_string(cc_c_ph[i]).c_str() << "\"";
+
+ // suppress curly brackets in spreadsheet software
+ if (has_vm && !to_string(cc_c_vm[i]).empty())
+ file << " vm" << "=\"" << to_string(cc_c_vm[i]).c_str() << "\"";
+
+ file << ">"; // end
+
+ bool f_si = false;
+
+ // ...
+ // f node: formula to be evaluated
+ if (!to_string(cc_f[i]).empty() || !to_string(cc_f_attr[i]).empty()) {
+ file << "";
+
+ file << to_string(cc_f[i]).c_str();
+
+ file << "";
+ }
+
+ // v node: value stored from evaluated formula
+ if (!to_string(cc_v[i]).empty()) {
+ if (!f_si & (to_string(cc_v[i]).compare(xml_preserver.c_str()) == 0)) {
+ // this looks strange
+ file << "";
+ file << " ";
+ file << "";
+ } else {
+ file << "" << to_string(cc_v[i]).c_str() << "";
+ }
+ }
+
+ // ...
+ if (to_string(cc_c_t[i]).compare("inlineStr") == 0) {
+ if (!to_string(cc_is[i]).empty()) {
+ file << to_string(cc_is[i]).c_str();
+ }
+ }
+
+ file << "";
+
+ // update lastrow
+ lastrow = thisrow;
+ }
+
+ file << "
";
+ file << "";
+ } else {
+ file << "";
+ }
+
+
+ file << post;
+ file << "";
+
+ file.close();
+
+}
+
+// export worksheet without pugixml
+// this should be way quicker, uses far less memory, but also skips all of the checks pugi does
+//
+// [[Rcpp::export]]
+void write_worksheet_slim(
+ Rcpp::Environment sheet_data,
+ std::string prior,
+ std::string post,
+ std::string fl
+){
+ // sheet_data will be in order, just need to check for row_heights
+ // CharacterVector cell_col = int_to_col(sheet_data.field("cols"));
+ Rcpp::DataFrame row_attr = Rcpp::as(sheet_data["row_attr"]);
+ Rcpp::DataFrame cc = Rcpp::as(sheet_data["cc"]);
+
+ xml_sheet_data_slim(row_attr, cc, prior, post, fl);
+}
+
// creates an xml row
// data in xml is ordered row wise. therefore we need the row attributes and
// the column data used in this row. This function uses both to create a single
diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R
index e57d13d4a..8c94cc34e 100644
--- a/tests/testthat/test-write.R
+++ b/tests/testthat/test-write.R
@@ -1470,5 +1470,27 @@ test_that("guarding against overwriting shared formula reference works", {
exp <- c("1", "2", "B1 + 1", "C1 + 1")
got <- unname(unlist(wb$to_df(show_formula = TRUE, col_names = FALSE)))
expect_equal(exp, got)
+})
+
+test_that("writing without pugixml works", {
+
+ temp <- temp_xlsx()
+ expect_silent(write_xlsx(x = mtcars, file = temp))
+ expect_silent(wb <- wb_load(temp))
+
+ temp <- temp_xlsx()
+ options("openxlsx2.export_with_pugi" = FALSE)
+ expect_silent(write_xlsx(x = mtcars, file = temp))
+ expect_silent(wb <- wb_load(temp))
+
+ temp <- temp_xlsx()
+ options("openxlsx2.export_with_pugi" = TRUE)
+ expect_silent(write_xlsx(x = mtcars, file = temp))
+ expect_silent(wb <- wb_load(temp))
+
+ temp <- temp_xlsx()
+ options("openxlsx2.export_with_pugi" = NULL)
+ expect_silent(write_xlsx(x = mtcars, file = temp))
+ expect_silent(wb <- wb_load(temp))
})