diff --git a/metabodecon-python/metabodecon/_metabodecon.pyi b/metabodecon-python/metabodecon/_metabodecon.pyi index 7676d25..030d872 100644 --- a/metabodecon-python/metabodecon/_metabodecon.pyi +++ b/metabodecon-python/metabodecon/_metabodecon.pyi @@ -104,6 +104,9 @@ class Spectrum: chemical_shifts: np.ndarray intensities: np.ndarray signal_boundaries: tuple[float, float] + nucleus: str + frequency: float + reference_compound: dict def __init__(self, chemical_shifts: np.ndarray, intensities: np.ndarray, signal_boundaries: tuple[float, float]) -> None: diff --git a/metabodecon-python/src/bindings/spectrum.rs b/metabodecon-python/src/bindings/spectrum.rs index b88b5ca..3ceccc2 100644 --- a/metabodecon-python/src/bindings/spectrum.rs +++ b/metabodecon-python/src/bindings/spectrum.rs @@ -3,6 +3,7 @@ use crate::error::SerializationError; use metabodecon::spectrum; use numpy::PyArray1; use pyo3::prelude::*; +use pyo3::types::PyDict; #[pyclass] #[derive(Clone, Debug)] @@ -88,6 +89,36 @@ impl Spectrum { self.inner.signal_boundaries() } + #[getter] + pub fn nucleus(&self) -> String { + self.inner.nucleus().to_string() + } + + #[getter] + pub fn frequency(&self) -> f64 { + self.inner.frequency() + } + + #[getter] + pub fn reference_compound<'py>(&self, py: Python<'py>) -> PyResult> { + let dict = PyDict::new(py); + let reference = self.inner.reference_compound(); + dict.set_item("chemical_shift", reference.chemical_shift())?; + dict.set_item("index", reference.index())?; + match reference.name() { + Some(name) => dict.set_item("name", name)?, + None => dict.set_item("name", "unknown")?, + }; + match reference.referencing_method() { + Some(referencing_method) => { + dict.set_item("referencing_method", referencing_method.to_string())? + } + None => dict.set_item("referencing_method", "unknown")?, + }; + + Ok(dict) + } + #[setter] pub fn set_signal_boundaries(&mut self, signal_boundaries: (f64, f64)) -> PyResult<()> { match self @@ -99,6 +130,41 @@ impl Spectrum { } } + #[setter] + pub fn set_nucleus(&mut self, nucleus: &str) { + self.inner + .set_nucleus(std::str::FromStr::from_str(nucleus).unwrap()); + } + + #[setter] + pub fn set_frequency(&mut self, frequency: f64) { + self.inner.set_frequency(frequency); + } + + #[setter] + pub fn set_reference_compound(&mut self, reference: Bound<'_, PyDict>) -> PyResult<()> { + let reference = reference.as_any(); + let chemical_shift = reference + .get_item("chemical_shift")? + .extract::()?; + let index = reference.get_item("index")?.extract::()?; + let name = match reference.get_item("name") { + Ok(name) => name.extract::().ok(), + Err(_) => None, + }; + let method = match reference.get_item("referencing_method") { + Ok(method) => method + .extract::() + .ok() + .map(|method| std::str::FromStr::from_str(&method).unwrap()), + Err(_) => None, + }; + let reference = spectrum::meta::ReferenceCompound::new(chemical_shift, index, name, method); + self.inner.set_reference_compound(reference); + + Ok(()) + } + pub fn write_json(&self, path: &str) -> PyResult<()> { let serialized = match serde_json::to_string_pretty(self.as_ref()) { Ok(serialized) => serialized, diff --git a/metabodecon/src/deconvolution/serialized_representations/serialized_deconvolution.rs b/metabodecon/src/deconvolution/serialized_representations/serialized_deconvolution.rs index 265cfa3..783e19f 100644 --- a/metabodecon/src/deconvolution/serialized_representations/serialized_deconvolution.rs +++ b/metabodecon/src/deconvolution/serialized_representations/serialized_deconvolution.rs @@ -18,8 +18,6 @@ use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename = "Deconvolution", rename_all = "camelCase")] pub(crate) struct SerializedDeconvolution { - /// The deconvoluted signals. - lorentzians: Vec, /// The smoothing parameters used. smoothing_settings: SmoothingSettings, /// The peak selection parameters used. @@ -28,6 +26,8 @@ pub(crate) struct SerializedDeconvolution { fitting_settings: FittingSettings, /// The mean squared error of the deconvolution. mse: f64, + /// The deconvoluted signals. + lorentzians: Vec, } impl> From for SerializedDeconvolution { @@ -35,11 +35,11 @@ impl> From for SerializedDeconvolution { let deconvolution = value.as_ref(); Self { - lorentzians: deconvolution.lorentzians().to_vec(), smoothing_settings: deconvolution.smoothing_settings(), selection_settings: deconvolution.selection_settings(), fitting_settings: deconvolution.fitting_settings(), mse: deconvolution.mse(), + lorentzians: deconvolution.lorentzians().to_vec(), } } } diff --git a/metabodecon/src/macros/sim_spectrum.rs b/metabodecon/src/macros/sim_spectrum.rs index c2aa9e6..c8ebf07 100644 --- a/metabodecon/src/macros/sim_spectrum.rs +++ b/metabodecon/src/macros/sim_spectrum.rs @@ -1,20 +1,43 @@ /// Test utility macro to check if the simulated spectrum was read correctly. #[macro_export] +#[cfg(test)] macro_rules! check_sim_spectrum { ($spectrum:expr) => { assert_eq!($spectrum.chemical_shifts().len(), 2048); assert_eq!($spectrum.intensities().len(), 2048); assert_approx_eq!( f64, - $spectrum.signal_boundaries().0, - 3.339007, - epsilon = $crate::CHECK_PRECISION + f64::min( + $spectrum.signal_boundaries().0, + $spectrum.signal_boundaries().1 + ), + 3.339007 ); assert_approx_eq!( f64, - $spectrum.signal_boundaries().1, - 3.553942, - epsilon = $crate::CHECK_PRECISION + f64::max( + $spectrum.signal_boundaries().0, + $spectrum.signal_boundaries().1 + ), + 3.553942 + ); + assert_eq!( + $spectrum.nucleus(), + $crate::spectrum::meta::Nucleus::Hydrogen1 + ); + assert_approx_eq!(f64, $spectrum.frequency(), 600.252806949999695); + assert_approx_eq!( + f64, + $spectrum.reference_compound().chemical_shift(), + $spectrum.chemical_shifts()[0] + ); + assert_eq!($spectrum.reference_compound().index(), 0); + assert_eq!($spectrum.reference_compound().name(), None); + assert_eq!( + $spectrum + .reference_compound() + .referencing_method(), + None ); }; } diff --git a/metabodecon/src/macros/thread_safety.rs b/metabodecon/src/macros/thread_safety.rs index f098d82..13137d9 100644 --- a/metabodecon/src/macros/thread_safety.rs +++ b/metabodecon/src/macros/thread_safety.rs @@ -1,5 +1,6 @@ /// Asserts that the given types are `Send`. #[macro_export] +#[cfg(test)] macro_rules! assert_send { ($($t:ty),+ $(,)?) => { $( @@ -11,6 +12,7 @@ macro_rules! assert_send { /// Asserts that the given types are `Sync`. #[macro_export] +#[cfg(test)] macro_rules! assert_sync { ($($t:ty),+ $(,)?) => { $( diff --git a/metabodecon/src/spectrum/formats.rs b/metabodecon/src/spectrum/formats.rs index 8d5cc27..d7e2928 100644 --- a/metabodecon/src/spectrum/formats.rs +++ b/metabodecon/src/spectrum/formats.rs @@ -9,7 +9,6 @@ mod bruker; pub use bruker::Bruker; #[cfg(feature = "jdx")] -#[allow(dead_code, unused_variables)] mod jcampdx; #[cfg(feature = "jdx")] pub use jcampdx::JcampDx; diff --git a/metabodecon/src/spectrum/formats/bruker.rs b/metabodecon/src/spectrum/formats/bruker.rs index 9472b9d..309031e 100644 --- a/metabodecon/src/spectrum/formats/bruker.rs +++ b/metabodecon/src/spectrum/formats/bruker.rs @@ -1,6 +1,7 @@ use crate::Result; use crate::spectrum::Spectrum; use crate::spectrum::formats::extract_capture; +use crate::spectrum::meta::Nucleus; use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; use regex::Regex; use std::fs::{File, read_to_string}; @@ -151,29 +152,36 @@ enum Type { struct AcquisitionParameters { /// The spectral width in ppm. width: f64, + frequency: f64, + nucleus: Nucleus, } /// Regex patterns to search for the acquisition parameters. -static ACQUS_RE: LazyLock<[Regex; 1]> = - LazyLock::new(|| [Regex::new(r"(##\$SW=\s*)(?P\d+(\.\d+)?)").unwrap()]); +static ACQUS_RE: LazyLock<[Regex; 3]> = LazyLock::new(|| { + [ + Regex::new(r"(##\$SW=\s*)(?P\d+(\.\d+)?)").unwrap(), + Regex::new(r"(##\$SFO1=\s*)(?P\d+(\.\d+)?)").unwrap(), + Regex::new(r"(##\$NUC1=\s*<)(?P\w+)").unwrap(), + ] +}); /// Keys used in the acquisition parameter regex patterns, used for error /// messages -static ACQUS_KEYS: LazyLock<[&str; 1]> = LazyLock::new(|| ["SW"]); +static ACQUS_KEYS: LazyLock<[&str; 3]> = LazyLock::new(|| ["SW", "SFO1", "NUC1"]); /// Processing parameters extracted from the `procs` file. #[derive(Debug)] struct ProcessingParameters { /// The maximum chemical shift in ppm. maximum: f64, - /// The size of the data, expected to be 2^15 or 2^17. - data_size: usize, + /// The scaling exponent of the data, if the data is stored as integers. + exponent: i32, /// The endianness of the data. endian: Endian, /// The data type of the raw signal intensities. data_type: Type, - /// The scaling exponent of the data, if the data is stored as integers. - exponent: i32, + /// The size of the data, expected to be 2^15 or 2^17. + data_size: usize, } /// Regex patterns to search for the processing parameters. @@ -272,7 +280,9 @@ impl Bruker { }) .collect(); let intensities = Self::read_one_r(one_r_path, procs)?; - let spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?; + let mut spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?; + spectrum.set_nucleus(acqus.nucleus); + spectrum.set_frequency(acqus.frequency); Ok(spectrum) } @@ -378,8 +388,14 @@ impl Bruker { let keys = &*ACQUS_KEYS; let width = extract_capture(&re[0], "width", &acqus, &path, keys[0])?; + let frequency = extract_capture(&re[1], "frequency", &acqus, &path, keys[1])?; + let nucleus = extract_capture(&re[2], "nucleus", &acqus, &path, keys[2])?; - Ok(AcquisitionParameters { width }) + Ok(AcquisitionParameters { + width, + frequency, + nucleus, + }) } /// Internal helper function to read the processing parameters from the @@ -410,10 +426,10 @@ impl Bruker { Ok(ProcessingParameters { maximum: spectrum_maximum, - data_size, + exponent: scaling_exponent, endian, data_type, - exponent: scaling_exponent, + data_size, }) } @@ -449,7 +465,6 @@ impl Bruker { .as_slice() .read_i32_into::(&mut temp)?, } - temp.reverse(); Ok(temp .into_iter() @@ -466,7 +481,6 @@ impl Bruker { .as_slice() .read_f64_into::(&mut temp)?, } - temp.reverse(); Ok(temp) } @@ -496,4 +510,30 @@ mod tests { check_sim_spectrum!(spectrum); }) } + + #[test] + fn read_acquisition_parameters() { + let path = "../data/bruker/blood/blood_01/10/acqus"; + let acqus = Bruker::read_acquisition_parameters(path).unwrap(); + assert_approx_eq!(f64, acqus.width, 20.0236139622347); + assert_approx_eq!(f64, acqus.frequency, 600.252821089118); + assert_eq!(acqus.nucleus, Nucleus::Hydrogen1); + } + + #[test] + fn read_processing_parameters() { + let path = "../data/bruker/blood/blood_01/10/pdata/10/procs"; + let procs = Bruker::read_processing_parameters(path).unwrap(); + assert_approx_eq!(f64, procs.maximum, 14.81146); + assert_eq!(procs.exponent, 0); + match procs.endian { + Endian::Little => {} + _ => panic!("Expected Little, got {:?}", procs.endian), + }; + match procs.data_type { + Type::I32 => {} + Type::F64 => panic!("Expected I32, got F64"), + } + assert_eq!(procs.data_size, 2_usize.pow(17)); + } } diff --git a/metabodecon/src/spectrum/formats/jcampdx.rs b/metabodecon/src/spectrum/formats/jcampdx.rs index c90b5be..5d2b5f2 100644 --- a/metabodecon/src/spectrum/formats/jcampdx.rs +++ b/metabodecon/src/spectrum/formats/jcampdx.rs @@ -2,7 +2,7 @@ use crate::Result; use crate::spectrum::Spectrum; use crate::spectrum::error::{Error, Kind}; use crate::spectrum::formats::{extract_capture, extract_row}; -use crate::spectrum::meta::{Nucleus, ReferenceCompound, ReferencingMethod}; +use crate::spectrum::meta::{Nucleus, ReferenceCompound}; use regex::{Captures, Regex}; use std::fs::read_to_string; use std::path::Path; @@ -365,6 +365,9 @@ enum XUnits { #[derive(Debug)] struct Header { /// The type of data (processed Spectrum or raw FID). + /// + /// Currently unused, as only Spectrum is supported. + #[allow(dead_code)] data_type: DataType, /// The data format (NTuples or XYData). format: Format, @@ -379,7 +382,7 @@ struct Header { /// Regex patterns to search for the header metadata. static HEADER_RE: LazyLock<[Regex; 11]> = LazyLock::new(|| { [ - Regex::new(r"(##JCAMPDX=\s*)(?P\d+(\.\d+)?)").unwrap(), + Regex::new(r"(##JCAMP(\s*|_|-)DX=\s*)(?P\d+(\.\d+)?)").unwrap(), Regex::new(r"(##DATA(\s|_)TYPE=\s*)(?P\w+\s\w+)").unwrap(), Regex::new(r"(##DATA(\s|_)CLASS=\s*)(?P\w+(\s\w+)?)").unwrap(), Regex::new(r"(##\.OBSERVE(\s|_)FREQUENCY=\s*)(?P\d+(\.\d+)?)").unwrap(), @@ -544,11 +547,8 @@ impl JcampDx { XUnits::Ppm => 1.0, }; let step = (block.last - block.first) * conversion / (block.data_size as f64 - 1.0); - let offset = match header.reference_compound { - Some(reference) => match reference.index() { - Some(index) => reference.chemical_shift() - index as f64 * step, - None => block.first * conversion, - }, + let offset = match &header.reference_compound { + Some(reference) => reference.chemical_shift() - reference.index() as f64 * step, None => block.first * conversion, }; let chemical_shifts = (0..block.data_size) @@ -558,7 +558,12 @@ impl JcampDx { true => Self::decode_asdf(&block.data, block.factor, path)?, false => Self::decode_affn(&block.data, block.factor, path)?, }; - let spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?; + let mut spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?; + spectrum.set_nucleus(header.nucleus); + spectrum.set_frequency(header.frequency); + if let Some(reference) = header.reference_compound { + spectrum.set_reference_compound(reference); + } Ok(spectrum) } @@ -594,47 +599,21 @@ impl JcampDx { "NTUPLES" => Format::NTuples, _ => return Err(Error::new(Kind::UnsupportedJcampDxFile).into()), }; - let frequency = extract_capture::(&re[3], "frequency", dx, &path, keys[3])?; - let nucleus = match extract_capture::(&re[4], "nucleus", dx, &path, keys[4])? - .to_uppercase() - .as_str() - { - "^1H" => Nucleus::Hydrogen1, - "^13C" => Nucleus::Carbon13, - "^15N" => Nucleus::Nitrogen15, - "^19F" => Nucleus::Fluorine19, - "^29SI" => Nucleus::Silicon29, - "^31P" => Nucleus::Phosphorus31, - name => Nucleus::Other(name.to_string()), - }; + let frequency = extract_capture(&re[3], "frequency", dx, &path, keys[3])?; + let nucleus = extract_capture(&re[4], "nucleus", dx, &path, keys[4])?; let reference_compound = { - let method = extract_capture::(&re[7], "method", dx, &path, keys[7]).ok(); + let method = extract_capture(&re[7], "method", dx, &path, keys[7]).ok(); let name = extract_capture::(&re[8], "name", dx, &path, keys[8]).ok(); - let index = extract_capture(&re[9], "index", dx, &path, keys[9]).ok(); + let index = extract_capture::(&re[9], "index", dx, &path, keys[9]).ok(); let shift = extract_capture(&re[10], "shift", dx, &path, keys[10]).ok(); if let (Some(shift), Some(index)) = (shift, index) { - let referencing_method = match method.as_deref() { - Some("INTERNAL") => Some(ReferencingMethod::Internal), - Some("EXTERNAL") => Some(ReferencingMethod::External), - _ => None, - }; - - Some(ReferenceCompound::new( - shift, - name, - Some(index), - referencing_method, - )) + Some(ReferenceCompound::new(shift, index - 1, name, method)) } else { let name = extract_capture::(&re[5], "name", dx, &path, keys[5]).ok(); let shift = extract_capture(&re[6], "shift", dx, &path, keys[6]).ok(); - if let Some(shift) = shift { - Some(ReferenceCompound::new(shift, name, Some(0), None)) - } else { - name.map(|name| ReferenceCompound::new(0.0, Some(name), None, None)) - } + shift.map(|shift| ReferenceCompound::new(shift, 0, name, None)) } }; @@ -864,16 +843,18 @@ impl JcampDx { let data = re[2].replace_all(&data, |captures: &Captures| { Self::undo_sqz(captures.name("sqz").unwrap().as_str()) }); - let mut data = re[3].replace_all(&data, |captures: &Captures| { - let dif = captures.name("dif").unwrap().as_str(); - let dup = captures.name("dup").unwrap().as_str(); - let next = captures.name("next").unwrap().as_str(); + let mut data = re[3] + .replace_all(&data, |captures: &Captures| { + let dif = captures.name("dif").unwrap().as_str(); + let dup = captures.name("dup").unwrap().as_str(); + let next = captures.name("next").unwrap().as_str(); - match dup { - "" | "S" => format!(" \n{}", next), - _ => format!(" {} {} \n{}", dif, Self::decrement_dup(dup), next), - } - }).to_string(); + match dup { + "" | "S" => format!(" \n{}", next), + _ => format!(" {} {} \n{}", dif, Self::decrement_dup(dup), next), + } + }) + .to_string(); loop { let tmp_data_dif = re[4].replace_all(&data, |captures: &Captures| { let value = captures.name("val").unwrap().as_str(); @@ -1011,7 +992,10 @@ impl JcampDx { '7' => "Y", '8' => "Z", '9' => "s", - _ => unreachable!("Non-numeric leading character in parsed usize: {}", decremented), + _ => unreachable!( + "Non-numeric leading character in parsed usize: {}", + decremented + ), } .to_string(); encoded.extend(decremented.chars().skip(1)); @@ -1028,31 +1012,31 @@ mod tests { #[test] fn read_affn_spectrum() { let path = "../data/jcamp-dx/BRUKAFFN.dx"; - let spectrum = JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); + JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); } #[test] fn read_pac_spectrum() { let path = "../data/jcamp-dx/BRUKPAC.dx"; - let spectrum = JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); + JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); } #[test] fn read_sqz_spectrum() { let path = "../data/jcamp-dx/BRUKSQZ.dx"; - let spectrum = JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); + JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); } #[test] fn read_dif_dup_spectrum() { let path = "../data/jcamp-dx/BRUKDIF.dx"; - let spectrum = JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); + JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); } #[test] fn read_ntuples_spectrum() { let path = "../data/jcamp-dx/BRUKNTUP.dx"; - let spectrum = JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); + JcampDx::read_spectrum(path, (20.0, 220.0)).unwrap(); } #[test] @@ -1068,10 +1052,7 @@ mod tests { Format::NTuples => (), }; assert_approx_eq!(f64, header.frequency, 100.4); - match header.nucleus { - Nucleus::Carbon13 => (), - _ => panic!("Expected Carbon13"), - }; + assert_eq!(header.nucleus, Nucleus::Carbon13); if header.reference_compound.is_some() { panic!("Expected None"); } diff --git a/metabodecon/src/spectrum/meta/nucleus.rs b/metabodecon/src/spectrum/meta/nucleus.rs index 1f017b6..5c51a76 100644 --- a/metabodecon/src/spectrum/meta/nucleus.rs +++ b/metabodecon/src/spectrum/meta/nucleus.rs @@ -1,7 +1,13 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +use std::fmt::Formatter; + /// The NMR nucleus. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Nucleus { /// Proton NMR. + #[default] Hydrogen1, /// Carbon-13 NMR. Carbon13, @@ -18,3 +24,44 @@ pub enum Nucleus { /// This should most likely just be treated as a malformed field. Other(String), } + +impl std::str::FromStr for Nucleus { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + let value = s + .trim() + .replace(" ", "") + .replace("^", "") + .replace("-", "") + .replace("_", "") + .as_str() + .to_uppercase(); + let nucleus = match value.as_str() { + "1H" | "PROTON" | "HYDROGEN1" => Nucleus::Hydrogen1, + "13C" | "CARBON13" => Nucleus::Carbon13, + "15N" | "NITROGEN15" => Nucleus::Nitrogen15, + "19F" | "FLUORINE19" => Nucleus::Fluorine19, + "29SI" | "SILICON29" => Nucleus::Silicon29, + "31P" | "PHOSPHORUS31" => Nucleus::Phosphorus31, + _ => Nucleus::Other(value), + }; + + Ok(nucleus) + } +} + +impl std::fmt::Display for Nucleus { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let nucleus = match self { + Nucleus::Hydrogen1 => "1H", + Nucleus::Carbon13 => "13C", + Nucleus::Nitrogen15 => "15N", + Nucleus::Fluorine19 => "19F", + Nucleus::Silicon29 => "29Si", + Nucleus::Phosphorus31 => "31P", + Nucleus::Other(value) => value.as_str(), + }; + write!(f, "{}", nucleus) + } +} diff --git a/metabodecon/src/spectrum/meta/reference.rs b/metabodecon/src/spectrum/meta/reference.rs index ef9713c..0939518 100644 --- a/metabodecon/src/spectrum/meta/reference.rs +++ b/metabodecon/src/spectrum/meta/reference.rs @@ -1,5 +1,9 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + /// The referencing method used in the NMR experiment. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ReferencingMethod { /// An internal reference was used. Internal, @@ -7,52 +11,121 @@ pub enum ReferencingMethod { External, } +impl std::str::FromStr for ReferencingMethod { + type Err = String; + + fn from_str(s: &str) -> Result { + let value = s.trim().to_uppercase(); + let method = match value.as_str() { + "INTERNAL" => Self::Internal, + "EXTERNAL" => Self::External, + _ => return Err(format!("invalid referencing method: {}", s)), + }; + + Ok(method) + } +} + +impl std::fmt::Display for ReferencingMethod { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let method = match self { + Self::Internal => "internal", + Self::External => "external", + }; + write!(f, "{}", method) + } +} + /// The reference compound used in the NMR experiment. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] +#[cfg_attr( + feature = "serde", + derive(Serialize, Deserialize), + serde(rename_all = "camelCase") +)] pub struct ReferenceCompound { /// The chemical shift of the reference compound in ppm. chemical_shift: f64, + /// The index of the reference compound in the NMR experiment. + index: usize, /// The name of the reference compound. name: Option, - /// The index of the reference compound in the NMR experiment. - index: Option, /// The referencing method used in the NMR experiment. referencing_method: Option, } +impl From for ReferenceCompound { + fn from(value: f64) -> Self { + Self { + chemical_shift: value, + ..Default::default() + } + } +} + +impl From<(f64, usize)> for ReferenceCompound { + fn from(value: (f64, usize)) -> Self { + Self { + chemical_shift: value.0, + index: value.1, + ..Default::default() + } + } +} + impl ReferenceCompound { /// Creates a new `ReferenceCompound`. pub fn new( chemical_shift: f64, + index: usize, name: Option, - index: Option, referencing_method: Option, ) -> Self { Self { chemical_shift, - name, index, + name, referencing_method, } } - /// Returns the name of the reference compound. - pub fn name(&self) -> Option<&str> { - self.name.as_deref() - } - /// Returns the chemical shift of the reference compound. pub fn chemical_shift(&self) -> f64 { self.chemical_shift } /// Returns the index of the reference compound. - pub fn index(&self) -> Option { + pub fn index(&self) -> usize { self.index } + /// Returns the name of the reference compound. + pub fn name(&self) -> Option<&str> { + self.name.as_deref() + } + /// Returns the referencing method used in the NMR experiment. pub fn referencing_method(&self) -> Option { self.referencing_method } + + /// Sets the chemical shift of the reference compound. + pub fn set_chemical_shift(&mut self, chemical_shift: f64) { + self.chemical_shift = chemical_shift; + } + + /// Sets the index of the reference compound. + pub fn set_index(&mut self, index: usize) { + self.index = index; + } + + /// Sets the name of the reference compound. + pub fn set_name(&mut self, name: Option) { + self.name = name; + } + + /// Sets the referencing method used in the NMR experiment. + pub fn set_referencing_method(&mut self, referencing_method: Option) { + self.referencing_method = referencing_method; + } } diff --git a/metabodecon/src/spectrum/serialized_spectrum.rs b/metabodecon/src/spectrum/serialized_spectrum.rs index 0d627fc..0463d5a 100644 --- a/metabodecon/src/spectrum/serialized_spectrum.rs +++ b/metabodecon/src/spectrum/serialized_spectrum.rs @@ -1,4 +1,5 @@ use crate::spectrum::Spectrum; +use crate::spectrum::meta::{Nucleus, ReferenceCompound}; use crate::{Error, Result}; use serde::{Deserialize, Serialize}; @@ -14,29 +15,34 @@ use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename = "Spectrum", rename_all = "camelCase")] pub(crate) struct SerializedSpectrum { - /// The intensities in arbitrary units. - intensities: Vec, - /// The number of data points in the spectrum. - size: usize, /// The spectrum boundaries in ppm. spectrum_boundaries: (f64, f64), /// The boundaries of the signal region. signal_boundaries: (f64, f64), + /// The number of data points in the spectrum. + size: usize, + /// The observed nucleus. + nucleus: Nucleus, + /// The spectrometer frequency in MHz. + frequency: f64, + /// The chemical shift reference. + reference_compound: ReferenceCompound, + /// The intensities in arbitrary units. + intensities: Vec, } impl> From for SerializedSpectrum { fn from(value: S) -> Self { let spectrum = value.as_ref(); - let intensities = spectrum.intensities().to_vec(); - let size = spectrum.len(); - let spectrum_boundaries = spectrum.range(); - let signal_boundaries = spectrum.signal_boundaries(); Self { - intensities, - size, - spectrum_boundaries, - signal_boundaries, + intensities: spectrum.intensities().to_vec(), + size: spectrum.len(), + spectrum_boundaries: spectrum.range(), + signal_boundaries: spectrum.signal_boundaries(), + nucleus: spectrum.nucleus(), + frequency: spectrum.frequency(), + reference_compound: spectrum.reference_compound().clone(), } } } @@ -53,7 +59,10 @@ impl TryFrom for Spectrum { let chemical_shifts = (0..size) .map(|index| start + index as f64 * step) .collect(); - let spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?; + let mut spectrum = Spectrum::new(chemical_shifts, intensities, signal_boundaries)?; + spectrum.set_nucleus(value.nucleus); + spectrum.set_frequency(value.frequency); + spectrum.set_reference_compound(value.reference_compound); Ok(spectrum) } @@ -80,7 +89,6 @@ impl PartialEq for SerializedSpectrum { impl SerializedSpectrum { /// Creates a valid `SerializedSpectrum` with 2^n resolution for testing. fn valid(resolution: u32) -> Self { - let spectrum_boundaries = (0.0, 10.0); let intensities = (0..2_u32.pow(resolution)) .map(|i| i as f64 * 10.0 / (2_f64.powi(resolution as i32) - 1.0)) .map(|x| { @@ -88,13 +96,15 @@ impl SerializedSpectrum { + 1.0 * 0.25 / (0.25_f64.powi(2) + (x - 7.0).powi(2)) }) .collect(); - let signal_boundaries = (1.0, 9.0); Self { intensities, size: 2_usize.pow(resolution), - spectrum_boundaries, - signal_boundaries, + spectrum_boundaries: (0.0, 10.0), + signal_boundaries: (1.0, 9.0), + nucleus: Nucleus::Hydrogen1, + frequency: 400.0, + reference_compound: ReferenceCompound::default(), } } } diff --git a/metabodecon/src/spectrum/spectrum.rs b/metabodecon/src/spectrum/spectrum.rs index 0f8d887..b92739e 100644 --- a/metabodecon/src/spectrum/spectrum.rs +++ b/metabodecon/src/spectrum/spectrum.rs @@ -1,6 +1,6 @@ use crate::Result; use crate::spectrum::error::{Error, Kind}; -use crate::spectrum::meta::Monotonicity; +use crate::spectrum::meta::{Monotonicity, Nucleus, ReferenceCompound}; use std::sync::Arc; #[cfg(feature = "serde")] @@ -10,8 +10,9 @@ use serde::{Deserialize, Serialize}; /// Data structure that represents a 1D NMR spectrum. /// -/// `Spectrum` is a fixed-size, read-only container that holds the chemical -/// shifts, signal intensities, and metadata of a 1D NMR spectrum. +/// `Spectrum` is a fixed-size, container that holds the chemical shifts, signal +/// intensities, and metadata of a 1D NMR spectrum. The data itself is read-only +/// but may be modified through the metadata (e.g. the reference). /// /// # Invariants /// @@ -101,6 +102,12 @@ pub struct Spectrum { intensities: Arc<[f64]>, /// The boundaries of the signal region. signal_boundaries: (f64, f64), + /// The observed nucleus. + nucleus: Nucleus, + /// The spectrometer frequency in MHz. + frequency: f64, + /// The chemical shift reference. + reference_compound: ReferenceCompound, /// The monotonicity of the data. monotonicity: Monotonicity, } @@ -178,11 +185,15 @@ impl Spectrum { let monotonicity = Monotonicity::from_f64s(chemical_shifts[0], chemical_shifts[1]).unwrap(); let signal_boundaries = Self::validate_boundaries(monotonicity, &chemical_shifts, signal_boundaries)?; + let first = chemical_shifts[0]; Ok(Self { chemical_shifts: chemical_shifts.into(), intensities: intensities.into(), signal_boundaries, + nucleus: Nucleus::default(), + frequency: 1.0, + reference_compound: first.into(), monotonicity, }) } @@ -263,6 +274,90 @@ impl Spectrum { self.signal_boundaries } + /// Returns the observed nucleus of the `Spectrum`. + /// + /// By default, this is set to [`Hydrogen1`]. + /// + /// [`Hydrogen1`]: Nucleus::Hydrogen1 + /// + /// # Example + /// + /// ``` + /// use metabodecon::spectrum::Spectrum; + /// use metabodecon::spectrum::meta::Nucleus; + /// + /// # fn main() -> metabodecon::Result<()> { + /// let spectrum = Spectrum::new( + /// vec![1.0, 2.0, 3.0], // Chemical shifts + /// vec![1.0, 2.0, 3.0], // Intensities + /// (1.0, 3.0), // Signal boundaries + /// )?; + /// + /// assert_eq!(spectrum.nucleus(), Nucleus::Hydrogen1); + /// # Ok(()) + /// # } + /// ``` + pub fn nucleus(&self) -> Nucleus { + self.nucleus.clone() + } + + /// Returns the spectrometer frequency of the `Spectrum` in MHz. + /// + /// By default, this is set to `1.0`. + /// + /// # Example + /// + /// ``` + /// use float_cmp::assert_approx_eq; + /// use metabodecon::spectrum::Spectrum; + /// + /// # fn main() -> metabodecon::Result<()> { + /// let spectrum = Spectrum::new( + /// vec![1.0, 2.0, 3.0], // Chemical shifts + /// vec![1.0, 2.0, 3.0], // Intensities + /// (1.0, 3.0), // Signal boundaries + /// )?; + /// + /// assert_approx_eq!(f64, spectrum.frequency(), 1.0); + /// # Ok(()) + /// # } + /// ``` + pub fn frequency(&self) -> f64 { + self.frequency + } + + /// Returns the chemical shift reference of the `Spectrum`. + /// + /// By default, this is set to the first chemical shift, with no name or + /// [`ReferencingMethod`]. + /// + /// [`ReferencingMethod`]: crate::spectrum::meta::ReferencingMethod + /// + /// # Example + /// + /// ``` + /// use float_cmp::assert_approx_eq; + /// use metabodecon::spectrum::Spectrum; + /// + /// # fn main() -> metabodecon::Result<()> { + /// let spectrum = Spectrum::new( + /// vec![1.0, 2.0, 3.0], // Chemical shifts + /// vec![1.0, 2.0, 3.0], // Intensities + /// (1.0, 3.0), // Signal boundaries + /// )?; + /// let reference = spectrum.reference_compound(); + /// + /// assert_approx_eq!(f64, reference.chemical_shift(), 1.0); + /// assert_eq!(reference.index(), 0); + /// assert_eq!(reference.name(), None); + /// assert_eq!(reference.referencing_method(), None); + /// # Ok(()) + /// # } + /// ``` + pub fn reference_compound(&self) -> ReferenceCompound { + self.reference_compound.clone() + } + /// Returns the monotonicity of the `Spectrum`. /// /// # Example @@ -328,6 +423,124 @@ impl Spectrum { Ok(()) } + /// Sets the observed nucleus of the `Spectrum`. + /// + /// This has no effect on the data itself. + /// + /// # Example + /// + /// ``` + /// use metabodecon::spectrum::Spectrum; + /// use metabodecon::spectrum::meta::Nucleus; + /// + /// # fn main() -> metabodecon::Result<()> { + /// let mut spectrum = Spectrum::new( + /// vec![1.0, 2.0, 3.0], // Chemical shifts + /// vec![1.0, 2.0, 3.0], // Intensities + /// (1.0, 3.0), // Signal boundaries + /// )?; + /// spectrum.set_nucleus(Nucleus::Carbon13); + /// + /// assert_eq!(spectrum.nucleus(), Nucleus::Carbon13); + /// # Ok(()) + /// # } + /// ``` + pub fn set_nucleus(&mut self, nucleus: Nucleus) { + self.nucleus = nucleus; + } + + /// Sets the spectrometer frequency of the `Spectrum` in MHz. + /// + /// This has no effect on the data itself, even though the chemical shifts + /// depend on it. The frequency field is only used as metadata. + /// + /// # Example + /// + /// ``` + /// use metabodecon::spectrum::Spectrum; + /// use metabodecon::spectrum::meta::Nucleus; + /// + /// # fn main() -> metabodecon::Result<()> { + /// use float_cmp::assert_approx_eq; + /// let mut spectrum = Spectrum::new( + /// vec![1.0, 2.0, 3.0], // Chemical shifts + /// vec![1.0, 2.0, 3.0], // Intensities + /// (1.0, 3.0), // Signal boundaries + /// )?; + /// spectrum.set_frequency(2.0); + /// + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[0], 1.0); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[1], 2.0); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[2], 3.0); + /// # Ok(()) + /// # } + /// ``` + pub fn set_frequency(&mut self, frequency: f64) { + self.frequency = frequency; + } + + /// Sets the reference compound of the `Spectrum`. + /// + /// The reference compound is used to set the chemical shift reference of + /// the `Spectrum`. The chemical shifts are adjusted such that the reference + /// compound is at the specified chemical shift. + /// + /// [`ReferenceCompound`] implements `From` and `From<(f64, usize)>` to + /// allow for easy conversion from a chemical shift or a chemical shift and + /// index pair. In the former case, the index is set to `0`, meaning that + /// the leftmost chemical shift is the reference. In the latter case, the + /// chemical shift at the index will be equal to the reference. + /// + /// # Example + /// + /// ``` + /// use metabodecon::spectrum::Spectrum; + /// use metabodecon::spectrum::meta::{Nucleus, ReferenceCompound}; + /// + /// # fn main() -> metabodecon::Result<()> { + /// use float_cmp::assert_approx_eq; + /// let mut spectrum = Spectrum::new( + /// vec![1.0, 2.0, 3.0], // Chemical shifts + /// vec![1.0, 2.0, 3.0], // Intensities + /// (1.0, 3.0), // Signal boundaries + /// )?; + /// + /// spectrum.set_reference_compound(10.0); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[0], 10.0); // Reference + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[1], 11.0); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[2], 12.0); + /// + /// spectrum.set_reference_compound((20.0, 1)); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[0], 19.0); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[1], 20.0); // Reference + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[2], 21.0); + /// + /// let name = Some("H2O".to_string()); + /// let reference = ReferenceCompound::new(4.8, 2, name, None); + /// spectrum.set_reference_compound(reference); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[0], 2.8); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[1], 3.8); + /// assert_approx_eq!(f64, spectrum.chemical_shifts()[2], 4.8); // Reference + /// // + /// # Ok(()) + /// # } + /// ``` + pub fn set_reference_compound>(&mut self, reference: T) { + let first_before = self.chemical_shifts[0]; + let step = self.step(); + let reference = reference.into(); + let offset = reference.chemical_shift() - reference.index() as f64 * step; + self.chemical_shifts = (0..self.len()) + .map(|i| offset + (i as f64) * step) + .collect(); + let first_after = self.chemical_shifts[0]; + self.signal_boundaries = ( + self.signal_boundaries.0 + first_after - first_before, + self.signal_boundaries.1 + first_after - first_before, + ); + self.reference_compound = reference; + } + /// Returns the number of chemical shift-intensity pairs in the `Spectrum`. /// /// # Example