Skip to content

Commit

Permalink
feat: v0.7.3, refined parsing of Shimadzu PDA data stream
Browse files Browse the repository at this point in the history
- Updated `read_shimadzu_lcd` to infer retention times in Shimadzu 3D Data from Max Plot stream since it is always (?) present.
- Updated `read_shimadzu_lcd` to skip parsing of metadata from 3D Data Item when it is not present.
- Added test for Chromeleon 3D ascii data
  • Loading branch information
ethanbass committed Jan 5, 2025
1 parent f70a469 commit fefefb0
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 45 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: chromConverter
Title: Chromatographic File Converter
Version: 0.7.2
Version: 0.7.3
Authors@R: c(
person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com",
role = c("aut", "cre"),
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## chromConverter 0.7.3

* Updated `read_shimadzu_lcd` to infer retention times in Shimadzu 3D Data from `Max Plot` stream since it is always (?) present.
* Updated `read_shimadzu_lcd` to skip parsing of metadata from 3D Data Item when it is not present.
* Updated `read_shimadzu_lcd` to include `Max Plot` stream when parsing 2D chromatograms.

## chromConverter 0.7.2

* Added preliminary support for extraction of peak tables from 'Shimadzu' `.lcd` files.
Expand Down
2 changes: 1 addition & 1 deletion R/olefile_utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ check_streams <- function(path, what = c("pda", "chroms", "tic", "peaks", ""),
}
} else {
streams <- ole$listdir()
what <- switch(what, "chroms" = "Chromatogram Ch",
what <- switch(what, "chroms" = "Chromatogram Ch|Max Plot",
"tic" = "Centroid SumTIC",
"peaks" = "Peak Table|PT")
selected_streams <- streams[grep(what, streams)]
Expand Down
86 changes: 60 additions & 26 deletions R/read_shimadzu_lcd.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,20 @@ read_sz_lcd_3d <- function(path, format_out = "matrix",
dat <- read_sz_pda(path, n_lambdas = n_lambdas)
colnames(dat) <- lambdas

DI <- read_sz_3DDI(path)
times <- seq(DI$DLT, DI$AT, by = DI$Rate)
if (length(times) != nrow(dat)){
times <- seq(DI$DLT, DI$AT, length.out = nrow(dat))
warning("Length of the inferred time axis does not match the number of rows
data_item_exists <- check_stream(path, c('PDA 3D Raw Data', '3D Data Item'))
if (data_item_exists){
DI <- read_sz_3DDI(path)
times <- seq(DI$DLT, DI$AT, by = DI$Rate)
if (length(times) != nrow(dat)){
times <- seq(DI$DLT, DI$AT, length.out = nrow(dat))
warning("Length of the inferred time axis does not match the number of rows
in the data.")
}
if (inherits(times, "numeric")){
rownames(dat) <- times
}
if (inherits(times, "numeric")){
rownames(dat) <- times
}
} else{
DI <- data.frame(DETN=NA, DSCN=NA, ADN=NA, detector.unit=NA)
}
if (data_format == "long"){
dat <- reshape_chrom(dat, data_format = "wide")
Expand Down Expand Up @@ -268,8 +273,8 @@ read_sz_lcd_2d <- function(path, format_out = "data.frame",

dat <- lapply(existing_streams, function(stream){
dat <- read_sz_chrom(path, stream = stream)

idx <- as.numeric(gsub("\\D", "", stream[2]))
idx <- ifelse(stream[2] == "Max Plot", "PDA",
as.numeric(gsub("\\D", "", stream[2])))
data_item_exists <- check_stream(path,c('LSS Data Processing', '2D Data Item'))
if (data_item_exists){
DI <- read_sz_2DDI(path, idx = idx)
Expand All @@ -279,8 +284,8 @@ read_sz_lcd_2d <- function(path, format_out = "data.frame",
dat <- dat*DI$detector.vf
}
} else{
DI <- data.frame(DETN=sapply(existing_streams,"[",2),
DSCN=NA, ADN=NA, detector.unit=NA)
DI <- data.frame(DETN = sapply(existing_streams,"[",2),
DSCN = NA, ADN = NA, detector.unit = NA)
times <- rownames(dat)
}
if (data_format == "long"){
Expand Down Expand Up @@ -449,21 +454,44 @@ get_sz_times <- function(sz_method, what = c("pda", "chromatogram"), nval){
} else NA
}

#' Get number of rows from LCD 3D Data item
#' Get number of rows and interval from 'PDA 3D Raw Data/Max Plot'
#' @author Ethan Bass
#' @noRd

get_shimadzu_rows <- function(path){
metadata_path <- export_stream(path, stream = c("PDA 3D Raw Data", "3D Data Item"))
if (is.na(metadata_path)){
warning("3D Data Item stream could not be found -- unable to infer number of rows in stream.")
return(NA)
} else {
meta <- xml2::read_xml(metadata_path)
cn_node <- xml2::xml_find_first(meta, "//CN")
as.numeric(xml2::xml_text(cn_node))
}
get_shimadzu_axis <- function(path){
maxplot_path <- export_stream(path, stream = c("PDA 3D Raw Data", "Max Plot"))
if (is.na(maxplot_path)){
warning("Unable to infer number of rows in stream.")
return(NA)
} else {
f <- file(maxplot_path, "rb")
on.exit(close(f))
seek(f,4)
interval <- readBin(f, what = "integer", n = 1, size = 4, endian = "little")
nrows <- readBin(f, what = "integer", n = 1, size = 4, endian = "little")
list(interval=interval, nrows=nrows)
}
}
# get_shimadzu_rows <- function(path){
# metadata_path <- export_stream(path, stream = c("PDA 3D Raw Data", "3D Data Item"))
# if (is.na(metadata_path)){
# maxplot_path <- export_stream(path, stream = c("PDA 3D Raw Data", "Max Plot"))
# if (is.na(maxplot_path)){
# warning("Unable to infer number of rows in stream.")
# return(NA)
# } else {
# f <- file(maxplot_path, "rb")
# on.exit(close(f))
# seek(f,4)
# interval <- readBin(f, what = "integer", n = 1, size = 4, endian = "little")
# n_rows <- readBin(f, what = "integer", n = 1, size = 4, endian = "little")
# list(interval=interval, nrows=nrows)
# }
# } else {
# meta <- xml2::read_xml(metadata_path)
# cn_node <- xml2::xml_find_first(meta, "//CN")
# as.numeric(xml2::xml_text(cn_node))
# }
# }

#' Read 'Shimadzu' LCD 3D Raw Data
#' @author Ethan Bass
Expand All @@ -482,8 +510,8 @@ read_sz_pda <- function(path, n_lambdas = NULL){
seek(f, 0, "start")
seek(f, 0, "start")

nrows <- get_shimadzu_rows(path)
nrows <- ifelse(is.na(nrows), fsize/(n_lambdas * 1.5), nrows)
axis <- get_shimadzu_axis(path)
nrows <- ifelse(is.na(axis$nrows), fsize/(n_lambdas * 1.5), axis$nrows)

mat <- matrix(NA, nrow = nrows, ncol = n_lambdas)
i <- 1
Expand All @@ -494,6 +522,8 @@ read_sz_pda <- function(path, n_lambdas = NULL){
if (any(is.na(mat[,1]))){
mat <- mat[-which(is.na(mat[,1])),]
}
times <- seq(from = 0, by = axis$interval, length.out = nrow(mat))/60000
rownames(mat) <- times
mat
}

Expand Down Expand Up @@ -764,6 +794,10 @@ read_sz_2DDI <- function(path, read_file = TRUE, idx = 1){
} else{
doc <- path
}
if (idx == "PDA"){
det <- xml2::xml_text(xml2::xml_find_all(doc, "//DN"))
idx <- which(det == "PDA")
}

nodes <- xml2::xml_child(doc, search = idx) |> xml2::xml_children()
ddi_idx <- which(xml2::xml_name(nodes) == "DDI")
Expand Down
2 changes: 1 addition & 1 deletion inst/CITATION
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ bibentry(
title = "chromConverter: Chromatographic File Converter",
author = "Ethan Bass",
year = "2024",
version = "version 0.7.2",
version = "version 0.7.3",
doi = "10.5281/zenodo.6792521",
url = "https://ethanbass.github.io/chromConverter/",
textVersion = paste("Bass, E. (2024).",
Expand Down
63 changes: 47 additions & 16 deletions tests/testthat/test-extra.R
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,35 @@ test_that("read_chroms can read 'Chromeleon' period-separated files", {
expect_equal(x[,1], x1[,2], ignore_attr = TRUE)
})

test_that("read_chroms can read 'Chromeleon' 3D data files", {
skip_on_cran()
skip_if_not_installed("chromConverterExtraTests")

path <- system.file("chromeleon_3D.txt",
package = "chromConverterExtraTests")
skip_if_not(file.exists(path))

x <- read_chroms(path, format_in = "chromeleon_uv", progress_bar = FALSE)[[1]]
expect_equal(class(x)[1], "matrix")
expect_equal(dim(x), c(6000, 301))
expect_equal(attr(x, "parser"), "chromconverter")
expect_equal(attr(x, "data_format"), "wide")
expect_equal(attr(x, "detector"), "UV")
expect_equal(attr(x, "sample_name"), "MeOH_Blank")
expect_equal(attr(x, "vial"), "GA1")
expect_equal(attr(x, "sample_injection_volume"), "1.000")
expect_equal(attr(x, "time_unit"), "Minutes")
expect_equal(attr(x, "detector_y_unit"), "mAU")

x1 <- read_chroms(path, format_in = "chromeleon", progress_bar = FALSE,
format_out = "data.frame", data_format = "long")[[1]]
expect_s3_class(x1[1], "data.frame")
expect_equal(colnames(x1), c("rt", "lambda", "intensity"))
expect_equal(as.numeric(rownames(x)), x1[x1$lambda==200,1])
expect_equal(nrow(x1), ncol(x)*nrow(x))
expect_equal(attr(x1, "data_format"), "long")
})

test_that("read_peaklist can read `Shimadzu` ASCII (PDA) files", {
skip_on_cran()
skip_if_missing_dependencies()
Expand Down Expand Up @@ -563,6 +592,7 @@ test_that("read_chroms can read 'Shimadzu' PDA files (ASCII and LCD)", {

x <- read_chroms(path_ascii, format_in = "shimadzu_dad",
progress_bar = FALSE)[[1]]

expect_equal(class(x)[1], "matrix")
expect_equal(dim(x), c(4689, 328))
expect_equal(attr(x, "parser"), "chromconverter")
Expand All @@ -571,11 +601,12 @@ test_that("read_chroms can read 'Shimadzu' PDA files (ASCII and LCD)", {
x1 <- read_chroms(path_ascii, format_in = "shimadzu_dad",
progress_bar = FALSE, data_format = "long",
format_out = "data.frame")[[1]]

expect_s3_class(x1[1], "data.frame")
expect_equal(dim(x1), c(4689 * 328, 3))


x2 <- read_chroms(path_lcd, progress_bar = FALSE)[[1]]

expect_equal(dim(x2), c(4689, 328))
expect_equal(x, x2, ignore_attr = TRUE)

Expand Down Expand Up @@ -643,32 +674,32 @@ test_that("read_chroms can read 2D chromatograms from 'Shimadzu' LCD files", {
x1 <- read_chroms(path_lcd, format_in = "shimadzu_lcd", what = "chroms",
progress_bar = FALSE)[[1]]

expect_equal(class(x1)[1], "matrix")
expect_equal(dim(x1), c(30000, 1))
expect_equal(x[-1,1], x1[,1], ignore_attr = TRUE)
all.equal(as.numeric(rownames(x)[-1]), as.numeric(rownames(x1)),
expect_equal(class(x1$AD2)[1], "matrix")
expect_equal(dim(x1$AD2), c(30000, 1))
expect_equal(x[-1,1], x1$AD2[,1], ignore_attr = TRUE)
all.equal(as.numeric(rownames(x)[-1]), as.numeric(rownames(x1$AD2)),
tolerance = .0001)

# unscaled
x2 <- read_chroms(path_lcd, format_in = "shimadzu_lcd", what = "chroms",
progress_bar = FALSE, scale = FALSE)[[1]]
all.equal(x[-1, 1], x2[, 1] * attr(x2, "intensity_multiplier"),
all.equal(x[-1, 1], x2$AD2[, 1] * attr(x2$AD2, "intensity_multiplier"),
check.attributes = FALSE)

# check metadata equivalence
expect_equal(attr(x, "software_version"), attr(x1, "software_version"))
expect_equal(attr(x, "method"), attr(x1, "method"))
expect_equal(attr(x, "batch"), attr(x1, "batch"))
expect_equal(attr(x, "operator"), attr(x1, "operator"))
expect_equal(attr(x, "sample_name"), attr(x1, "sample_name"))
expect_equal(attr(x, "sample_id"), attr(x1, "sample_id"))
expect_equal(attr(x, "software_version"), attr(x1$AD2, "software_version"))
expect_equal(attr(x, "method"), attr(x1$AD2, "method"))
expect_equal(attr(x, "batch"), attr(x1$AD2, "batch"))
expect_equal(attr(x, "operator"), attr(x1$AD2, "operator"))
expect_equal(attr(x, "sample_name"), attr(x1$AD2, "sample_name"))
expect_equal(attr(x, "sample_id"), attr(x1$AD2, "sample_id"))
expect_equal(attr(x, "sample_injection_volume"),
attr(x1, "sample_injection_volume"))
attr(x1$AD2, "sample_injection_volume"))
expect_equal(as.numeric(attr(x, "time_range")),
round(attr(x1, "time_range"), 3))
expect_equal(attr(x, "detector_y_unit"), attr(x1, "detector_y_unit"))
round(attr(x1$AD2, "time_range"), 3))
expect_equal(attr(x, "detector_y_unit"), attr(x1$AD2, "detector_y_unit"))
expect_equal(attr(x, "intensity_multiplier"),
attr(x1, "intensity_multiplier"))
attr(x1$AD2, "intensity_multiplier"))
})

test_that("read_chroms can read 'Shimadzu' PDA comma-separated file", {
Expand Down

0 comments on commit fefefb0

Please sign in to comment.