Skip to content

Commit

Permalink
Update Spectrum struct
Browse files Browse the repository at this point in the history
- Add observed nucleus, spectrometer frequency and reference compound fields. These are not needed for initialization, so they will take on default values at first.
- Add methods to access and mutate these values
- Observed nucleus and spectrometer frequency have no effect on the data and only serve as additional information
- Reference compound is used to shift the x values
- Update serialization, and python bindings and type hints
- Add parsing/mutation of these values to Bruker and JCAMP-DX implementations with tests
- Update Nucleus, ReferencingMethod and ReferenceCompound
- Update tests
  • Loading branch information
SombkeMaximilian committed Mar 5, 2025
1 parent cbe1667 commit c84cee4
Show file tree
Hide file tree
Showing 12 changed files with 572 additions and 115 deletions.
3 changes: 3 additions & 0 deletions metabodecon-python/metabodecon/_metabodecon.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ class Spectrum:
chemical_shifts: np.ndarray
intensities: np.ndarray
signal_boundaries: tuple[float, float]
nucleus: str
frequency: float
reference_compound: dict

def __init__(self, chemical_shifts: np.ndarray, intensities: np.ndarray,
signal_boundaries: tuple[float, float]) -> None:
Expand Down
66 changes: 66 additions & 0 deletions metabodecon-python/src/bindings/spectrum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::error::SerializationError;
use metabodecon::spectrum;
use numpy::PyArray1;
use pyo3::prelude::*;
use pyo3::types::PyDict;

#[pyclass]
#[derive(Clone, Debug)]
Expand Down Expand Up @@ -88,6 +89,36 @@ impl Spectrum {
self.inner.signal_boundaries()
}

#[getter]
pub fn nucleus(&self) -> String {
self.inner.nucleus().to_string()
}

#[getter]
pub fn frequency(&self) -> f64 {
self.inner.frequency()
}

#[getter]
pub fn reference_compound<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
let dict = PyDict::new(py);
let reference = self.inner.reference_compound();
dict.set_item("chemical_shift", reference.chemical_shift())?;
dict.set_item("index", reference.index())?;
match reference.name() {
Some(name) => dict.set_item("name", name)?,
None => dict.set_item("name", "unknown")?,
};
match reference.referencing_method() {
Some(referencing_method) => {
dict.set_item("referencing_method", referencing_method.to_string())?
}
None => dict.set_item("referencing_method", "unknown")?,
};

Ok(dict)
}

#[setter]
pub fn set_signal_boundaries(&mut self, signal_boundaries: (f64, f64)) -> PyResult<()> {
match self
Expand All @@ -99,6 +130,41 @@ impl Spectrum {
}
}

#[setter]
pub fn set_nucleus(&mut self, nucleus: &str) {
self.inner
.set_nucleus(std::str::FromStr::from_str(nucleus).unwrap());
}

#[setter]
pub fn set_frequency(&mut self, frequency: f64) {
self.inner.set_frequency(frequency);
}

#[setter]
pub fn set_reference_compound(&mut self, reference: Bound<'_, PyDict>) -> PyResult<()> {
let reference = reference.as_any();
let chemical_shift = reference
.get_item("chemical_shift")?
.extract::<f64>()?;
let index = reference.get_item("index")?.extract::<usize>()?;
let name = match reference.get_item("name") {
Ok(name) => name.extract::<String>().ok(),
Err(_) => None,
};
let method = match reference.get_item("referencing_method") {
Ok(method) => method
.extract::<String>()
.ok()
.map(|method| std::str::FromStr::from_str(&method).unwrap()),
Err(_) => None,
};
let reference = spectrum::meta::ReferenceCompound::new(chemical_shift, index, name, method);
self.inner.set_reference_compound(reference);

Ok(())
}

pub fn write_json(&self, path: &str) -> PyResult<()> {
let serialized = match serde_json::to_string_pretty(self.as_ref()) {
Ok(serialized) => serialized,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(rename = "Deconvolution", rename_all = "camelCase")]
pub(crate) struct SerializedDeconvolution {
/// The deconvoluted signals.
lorentzians: Vec<Lorentzian>,
/// The smoothing parameters used.
smoothing_settings: SmoothingSettings,
/// The peak selection parameters used.
Expand All @@ -28,18 +26,20 @@ pub(crate) struct SerializedDeconvolution {
fitting_settings: FittingSettings,
/// The mean squared error of the deconvolution.
mse: f64,
/// The deconvoluted signals.
lorentzians: Vec<Lorentzian>,
}

impl<D: AsRef<Deconvolution>> From<D> for SerializedDeconvolution {
fn from(value: D) -> Self {
let deconvolution = value.as_ref();

Self {
lorentzians: deconvolution.lorentzians().to_vec(),
smoothing_settings: deconvolution.smoothing_settings(),
selection_settings: deconvolution.selection_settings(),
fitting_settings: deconvolution.fitting_settings(),
mse: deconvolution.mse(),
lorentzians: deconvolution.lorentzians().to_vec(),
}
}
}
Expand Down
35 changes: 29 additions & 6 deletions metabodecon/src/macros/sim_spectrum.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,43 @@
/// Test utility macro to check if the simulated spectrum was read correctly.
#[macro_export]
#[cfg(test)]
macro_rules! check_sim_spectrum {
($spectrum:expr) => {
assert_eq!($spectrum.chemical_shifts().len(), 2048);
assert_eq!($spectrum.intensities().len(), 2048);
assert_approx_eq!(
f64,
$spectrum.signal_boundaries().0,
3.339007,
epsilon = $crate::CHECK_PRECISION
f64::min(
$spectrum.signal_boundaries().0,
$spectrum.signal_boundaries().1
),
3.339007
);
assert_approx_eq!(
f64,
$spectrum.signal_boundaries().1,
3.553942,
epsilon = $crate::CHECK_PRECISION
f64::max(
$spectrum.signal_boundaries().0,
$spectrum.signal_boundaries().1
),
3.553942
);
assert_eq!(
$spectrum.nucleus(),
$crate::spectrum::meta::Nucleus::Hydrogen1
);
assert_approx_eq!(f64, $spectrum.frequency(), 600.252806949999695);
assert_approx_eq!(
f64,
$spectrum.reference_compound().chemical_shift(),
$spectrum.chemical_shifts()[0]
);
assert_eq!($spectrum.reference_compound().index(), 0);
assert_eq!($spectrum.reference_compound().name(), None);
assert_eq!(
$spectrum
.reference_compound()
.referencing_method(),
None
);
};
}
2 changes: 2 additions & 0 deletions metabodecon/src/macros/thread_safety.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/// Asserts that the given types are `Send`.
#[macro_export]
#[cfg(test)]
macro_rules! assert_send {
($($t:ty),+ $(,)?) => {
$(
Expand All @@ -11,6 +12,7 @@ macro_rules! assert_send {

/// Asserts that the given types are `Sync`.
#[macro_export]
#[cfg(test)]
macro_rules! assert_sync {
($($t:ty),+ $(,)?) => {
$(
Expand Down
1 change: 0 additions & 1 deletion metabodecon/src/spectrum/formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ mod bruker;
pub use bruker::Bruker;

#[cfg(feature = "jdx")]
#[allow(dead_code, unused_variables)]
mod jcampdx;
#[cfg(feature = "jdx")]
pub use jcampdx::JcampDx;
66 changes: 53 additions & 13 deletions metabodecon/src/spectrum/formats/bruker.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::Result;
use crate::spectrum::Spectrum;
use crate::spectrum::formats::extract_capture;
use crate::spectrum::meta::Nucleus;
use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
use regex::Regex;
use std::fs::{File, read_to_string};
Expand Down Expand Up @@ -151,29 +152,36 @@ enum Type {
struct AcquisitionParameters {
/// The spectral width in ppm.
width: f64,
frequency: f64,
nucleus: Nucleus,
}

/// Regex patterns to search for the acquisition parameters.
static ACQUS_RE: LazyLock<[Regex; 1]> =
LazyLock::new(|| [Regex::new(r"(##\$SW=\s*)(?P<width>\d+(\.\d+)?)").unwrap()]);
static ACQUS_RE: LazyLock<[Regex; 3]> = LazyLock::new(|| {
[
Regex::new(r"(##\$SW=\s*)(?P<width>\d+(\.\d+)?)").unwrap(),
Regex::new(r"(##\$SFO1=\s*)(?P<frequency>\d+(\.\d+)?)").unwrap(),
Regex::new(r"(##\$NUC1=\s*<)(?P<nucleus>\w+)").unwrap(),
]
});

/// Keys used in the acquisition parameter regex patterns, used for error
/// messages
static ACQUS_KEYS: LazyLock<[&str; 1]> = LazyLock::new(|| ["SW"]);
static ACQUS_KEYS: LazyLock<[&str; 3]> = LazyLock::new(|| ["SW", "SFO1", "NUC1"]);

/// Processing parameters extracted from the `procs` file.
#[derive(Debug)]
struct ProcessingParameters {
/// The maximum chemical shift in ppm.
maximum: f64,
/// The size of the data, expected to be 2^15 or 2^17.
data_size: usize,
/// The scaling exponent of the data, if the data is stored as integers.
exponent: i32,
/// The endianness of the data.
endian: Endian,
/// The data type of the raw signal intensities.
data_type: Type,
/// The scaling exponent of the data, if the data is stored as integers.
exponent: i32,
/// The size of the data, expected to be 2^15 or 2^17.
data_size: usize,
}

/// Regex patterns to search for the processing parameters.
Expand Down Expand Up @@ -272,7 +280,9 @@ impl Bruker {
})
.collect();
let intensities = Self::read_one_r(one_r_path, procs)?;
let spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?;
let mut spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?;
spectrum.set_nucleus(acqus.nucleus);
spectrum.set_frequency(acqus.frequency);

Ok(spectrum)
}
Expand Down Expand Up @@ -378,8 +388,14 @@ impl Bruker {
let keys = &*ACQUS_KEYS;

let width = extract_capture(&re[0], "width", &acqus, &path, keys[0])?;
let frequency = extract_capture(&re[1], "frequency", &acqus, &path, keys[1])?;
let nucleus = extract_capture(&re[2], "nucleus", &acqus, &path, keys[2])?;

Ok(AcquisitionParameters { width })
Ok(AcquisitionParameters {
width,
frequency,
nucleus,
})
}

/// Internal helper function to read the processing parameters from the
Expand Down Expand Up @@ -410,10 +426,10 @@ impl Bruker {

Ok(ProcessingParameters {
maximum: spectrum_maximum,
data_size,
exponent: scaling_exponent,
endian,
data_type,
exponent: scaling_exponent,
data_size,
})
}

Expand Down Expand Up @@ -449,7 +465,6 @@ impl Bruker {
.as_slice()
.read_i32_into::<BigEndian>(&mut temp)?,
}
temp.reverse();

Ok(temp
.into_iter()
Expand All @@ -466,7 +481,6 @@ impl Bruker {
.as_slice()
.read_f64_into::<BigEndian>(&mut temp)?,
}
temp.reverse();

Ok(temp)
}
Expand Down Expand Up @@ -496,4 +510,30 @@ mod tests {
check_sim_spectrum!(spectrum);
})
}

#[test]
fn read_acquisition_parameters() {
let path = "../data/bruker/blood/blood_01/10/acqus";
let acqus = Bruker::read_acquisition_parameters(path).unwrap();
assert_approx_eq!(f64, acqus.width, 20.0236139622347);
assert_approx_eq!(f64, acqus.frequency, 600.252821089118);
assert_eq!(acqus.nucleus, Nucleus::Hydrogen1);
}

#[test]
fn read_processing_parameters() {
let path = "../data/bruker/blood/blood_01/10/pdata/10/procs";
let procs = Bruker::read_processing_parameters(path).unwrap();
assert_approx_eq!(f64, procs.maximum, 14.81146);
assert_eq!(procs.exponent, 0);
match procs.endian {
Endian::Little => {}
_ => panic!("Expected Little, got {:?}", procs.endian),
};
match procs.data_type {
Type::I32 => {}
Type::F64 => panic!("Expected I32, got F64"),
}
assert_eq!(procs.data_size, 2_usize.pow(17));
}
}
Loading

0 comments on commit c84cee4

Please sign in to comment.