Skip to content

Commit

Permalink
[misc] attemt to reduce the number of variables in cc (#1253)
Browse files Browse the repository at this point in the history
* [misc] attemt to reduce the number of variables in `cc`

* [misc] attemt to reduce the number of variables in `cc`

* [tests] fix the broken test
  • Loading branch information
JanMarvin authored Jan 29, 2025
1 parent 925a58b commit bca718e
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 124 deletions.
10 changes: 0 additions & 10 deletions R/class-sheet-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,6 @@ wb_sheet_data <- function() {

# helpers -----------------------------------------------------------------

# Consider making some helpers for the cc stuff.

empty_sheet_data_cc <- function(n) {
create_char_dataframe(
colnames = c("r", "row_r", "c_r", "c_s", "c_t", "c_cm", "c_ph", "c_vm",
"v", "f", "f_attr", "is", "typ"),
n = n
)
}

empty_row_attr <- function(n) {
create_char_dataframe(
colnames = c("collapsed", "customFormat", "customHeight", "x14ac:dyDescent",
Expand Down
2 changes: 1 addition & 1 deletion R/class-workbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -4042,7 +4042,7 @@ wbWorkbook <- R6::R6Class(

if (as_ref) {
from_sheet_name <- self$get_sheet_names(escape = TRUE)[[from_sheet]]
to_cc[c("c_t", "c_cm", "c_ph", "c_vm", "v", "f", "f_attr", "is")] <- ""
to_cc[names(to_cc) %in% c("c_t", "c_cm", "c_ph", "c_vm", "v", "f", "f_attr", "is")] <- ""
to_cc[c("f")] <- paste0(shQuote(from_sheet_name, type = "sh"), "!", from_dims)
}

Expand Down
4 changes: 2 additions & 2 deletions R/class-worksheet.R
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,10 @@ wbWorksheet <- R6::R6Class(

if (characters)
cc[sel & cc$c_t %in% c("inlineStr", "s", "str"),
c("c_t", "c_ph", "v", "f", "f_attr", "is")] <- ""
names(cc) %in% c("c_t", "c_ph", "v", "f", "f_attr", "is")] <- ""

if (styles)
cc[sel, c("c_s", "c_cm", "c_vm")] <- ""
cc[sel, names(cc) %in% c("c_s", "c_cm", "c_vm")] <- ""

self$sheet_data$cc <- cc

Expand Down
29 changes: 17 additions & 12 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,11 @@ inner_update <- function(
na.strings <- NULL
}

if (removeCellStyle) {
cell_style <- "c_s"
} else {
cell_style <- NULL
replacement <- names(cc)
if (!removeCellStyle) {
replacement <- replacement[-which(replacement == "c_s")]
}

replacement <- c("r", cell_style, "c_t", "c_cm", "c_ph", "c_vm", "v",
"f", "f_attr", "is", "typ")

sel <- match(x$r, cc$r)

# to avoid bricking the worksheet, we make sure that we do not overwrite the
Expand Down Expand Up @@ -148,9 +144,10 @@ inner_update <- function(
initialize_cell <- function(wb, sheet, new_cells) {

sheet_id <- wb$validate_sheet(sheet)
nms <- names(wb$worksheets[[sheet_id]]$sheet_data$cc)

# create artificial cc for the missing cells
x <- empty_sheet_data_cc(n = length(new_cells))
x <- create_char_dataframe(n = length(new_cells), colnames = nms)
x$r <- new_cells
x$row_r <- gsub("[[:upper:]]", "", new_cells)
x$c_r <- gsub("[[:digit:]]", "", new_cells)
Expand Down Expand Up @@ -417,8 +414,15 @@ write_data2 <- function(
rows_attr$r <- rownames(rtyp)

# original cc data frame
cc <- empty_sheet_data_cc(n = nrow(data) * ncol(data))

has_cm <- if (any(dc == openxlsx2_celltype[["cm_formula"]])) "c_cm" else NULL
nms <- c(
"r", "row_r", "c_r", "c_s", "c_t", has_cm,
"v", "f", "f_attr", "is", "typ"
)
cc <- create_char_dataframe(
colnames = nms,
n = nrow(data) * ncol(data)
)

sel <- which(dc == openxlsx2_celltype[["logical"]])
for (i in sel) {
Expand Down Expand Up @@ -524,13 +528,14 @@ write_data2 <- function(

int_si <- max(int_si, -1L) + 1L

cc$f_attr <- sprintf("t=\"%s\"", "shared")
cc[["f_attr"]] <- sprintf("t=\"%s\"", "shared")
cc[1, "f_attr"] <- paste(cc[1, "f_attr"], sprintf("ref=\"%s\"", dims))
cc[["f_attr"]] <- paste(cc[["f_attr"]], sprintf("si=\"%s\"", int_si))
cc[2:nrow(cc), "f"] <- ""
cc$f_attr <- paste(cc$f_attr, sprintf("si=\"%s\"", int_si))
}

if (is.null(wb$worksheets[[sheetno]]$sheet_data$cc)) {
# message("write_cell()")

wb$worksheets[[sheetno]]$dimension <- paste0("<dimension ref=\"", dims, "\"/>")

Expand Down
6 changes: 5 additions & 1 deletion src/helper_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,14 @@ void wide_to_long(

int32_t in_string_nums = string_nums;

bool has_cm = zz.containsElementNamed("c_cm");

// pointer magic. even though these are extracted, they just point to the
// memory in the data frame
Rcpp::CharacterVector zz_c_cm;

Rcpp::CharacterVector zz_row_r = Rcpp::as<Rcpp::CharacterVector>(zz["row_r"]);
Rcpp::CharacterVector zz_c_cm = Rcpp::as<Rcpp::CharacterVector>(zz["c_cm"]);
if (has_cm) zz_c_cm = Rcpp::as<Rcpp::CharacterVector>(zz["c_cm"]);
Rcpp::CharacterVector zz_c_r = Rcpp::as<Rcpp::CharacterVector>(zz["c_r"]);
Rcpp::CharacterVector zz_v = Rcpp::as<Rcpp::CharacterVector>(zz["v"]);
Rcpp::CharacterVector zz_c_t = Rcpp::as<Rcpp::CharacterVector>(zz["c_t"]);
Expand Down
21 changes: 17 additions & 4 deletions src/load_workbook.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ inline Rcpp::DataFrame row_to_df(XPtrXML doc) {
void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {
auto ws = doc->child("worksheet").child("sheetData");

bool has_cm = false, has_ph = false, has_vm = false;

// character
Rcpp::DataFrame row_attributes;

Expand Down Expand Up @@ -228,9 +230,18 @@ void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {

if (attr_name == s_str) single_xml_col.c_s = buffer;
if (attr_name == t_str) single_xml_col.c_t = buffer;
if (attr_name == cm_str) single_xml_col.c_cm = buffer;
if (attr_name == ph_str) single_xml_col.c_ph = buffer;
if (attr_name == vm_str) single_xml_col.c_vm = buffer;
if (attr_name == cm_str) {
has_cm = true;
single_xml_col.c_cm = buffer;
}
if (attr_name == ph_str) {
has_ph = true;
single_xml_col.c_ph = buffer;
}
if (attr_name == vm_str) {
has_vm = true;
single_xml_col.c_vm = buffer;
}
}

// some files have no colnames. in this case we need to add c_r and row_r
Expand Down Expand Up @@ -283,6 +294,8 @@ void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {
++itr_rows;
}

// Rcpp::Rcout << has_cm << ": " << has_ph << ": " << has_vm << std::endl;

sheet_data["row_attr"] = row_attributes;
sheet_data["cc"] = Rcpp::wrap(xml_cols);
sheet_data["cc"] = xml_cols_to_df(xml_cols, has_cm, has_ph, has_vm);
}
181 changes: 181 additions & 0 deletions src/openxlsx2.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,184 @@ static inline bool validate_dims(const std::string& input) {

return has_col && has_row;
}

inline SEXP xml_cols_to_df(const std::vector<xml_col>& x, bool has_cm, bool has_ph, bool has_vm) {
R_xlen_t n = static_cast<R_xlen_t>(x.size());

// Vector structure identical to xml_col from openxlsx2_types.h
Rcpp::CharacterVector r(Rcpp::no_init(n)); // cell name: A1, A2 ...
Rcpp::CharacterVector row_r(Rcpp::no_init(n)); // row name: 1, 2, ..., 9999

Rcpp::CharacterVector c_r(Rcpp::no_init(n)); // col name: A, B, ..., ZZ
Rcpp::CharacterVector c_s(Rcpp::no_init(n)); // cell style
Rcpp::CharacterVector c_t(Rcpp::no_init(n)); // cell type
Rcpp::CharacterVector c_cm, c_ph, c_vm;
if (has_cm) c_cm = Rcpp::CharacterVector(Rcpp::no_init(n));
if (has_ph) c_ph = Rcpp::CharacterVector(Rcpp::no_init(n));
if (has_vm) c_vm = Rcpp::CharacterVector(Rcpp::no_init(n));

Rcpp::CharacterVector v(Rcpp::no_init(n)); // <v> tag
Rcpp::CharacterVector f(Rcpp::no_init(n)); // <f> tag
Rcpp::CharacterVector f_attr(Rcpp::no_init(n)); // <f /> attributes
Rcpp::CharacterVector is(Rcpp::no_init(n)); // <is> tag

// struct to vector
// We have to convert utf8 inputs via Rcpp::String for non unicode R sessions
// Ideally there would be a function that calls Rcpp::String only if needed
for (R_xlen_t i = 0; i < n; ++i) {
size_t ii = static_cast<size_t>(i);
if (!x[ii].r.empty()) r[i] = std::string(x[ii].r);
if (!x[ii].row_r.empty()) row_r[i] = std::string(x[ii].row_r);
if (!x[ii].c_r.empty()) c_r[i] = std::string(x[ii].c_r);
if (!x[ii].c_s.empty()) c_s[i] = std::string(x[ii].c_s);
if (!x[ii].c_t.empty()) c_t[i] = std::string(x[ii].c_t);
if (has_cm && !x[ii].c_cm.empty()) c_cm[i] = std::string(x[ii].c_cm);
if (has_ph && !x[ii].c_ph.empty()) c_ph[i] = Rcpp::String(x[ii].c_ph);
if (has_vm && !x[ii].c_vm.empty()) c_vm[i] = std::string(x[ii].c_vm);
if (!x[ii].v.empty()) { // can only be utf8 if c_t = "str"
if (x[ii].c_t.empty() && x[ii].f_attr.empty())
v[i] = std::string(x[ii].v);
else
v[i] = Rcpp::String(x[ii].v);
}
if (!x[ii].f.empty()) f[i] = Rcpp::String(x[ii].f);
if (!x[ii].f_attr.empty()) f_attr[i] = std::string(x[ii].f_attr);
if (!x[ii].is.empty()) is[i] = Rcpp::String(x[ii].is);
}

// Assign and return a dataframe
if (has_cm && has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (has_cm && has_ph && !has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (has_cm && !has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (!has_cm && has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (has_cm && !has_ph && !has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (!has_cm && has_ph && !has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (!has_cm && !has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
}

}
Loading

0 comments on commit bca718e

Please sign in to comment.