Skip to content

Commit

Permalink
Add DWT denoiser
Browse files Browse the repository at this point in the history
  • Loading branch information
franckalbinet committed Jan 23, 2025
1 parent 0c888d7 commit efa8ec7
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 21 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ Implemented transforms developed so far include:

- **Smoothing**:

- [ ] `WaveletDenoise`: Wavelet denoising
- [x]
[`WaveletDenoise`](https://franckalbinet.github.io/soilspectfm/core.html#waveletdenoise):
Wavelet denoising
- [ ] `SavGolSmooth`: Savitzky-Golay smoothing

- **Other transformations**:
Expand Down
102 changes: 93 additions & 9 deletions nbs/00_core.ipynb

Large diffs are not rendered by default.

42 changes: 41 additions & 1 deletion nbs/02.utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
"outputs": [],
"source": [
"#| exports\n",
"toy_mir_url = 'https://gist.githubusercontent.com/franckalbinet/a7476d0413e88bcc162c553e43f182fa/raw/a45dc573eec558b019909bdd1830fbb207511fcb/mir-spectra-sample.csv'"
"toy_mir_url = 'https://gist.githubusercontent.com/franckalbinet/a7476d0413e88bcc162c553e43f182fa/raw/a45dc573eec558b019909bdd1830fbb207511fcb/mir-spectra-sample.csv'\n",
"toy_noisy_mir_url = 'https://gist.githubusercontent.com/franckalbinet/3e4e16f592175edad724c22841eb88dd/raw/c6a3f83c0146fbdc826aa4e9c1c448120a281a2e/rt_mir_bruker.csv'"
]
},
{
Expand Down Expand Up @@ -82,6 +83,45 @@
"X, ws = load_toy_mir()\n",
"print(f'X shape: {X.shape}, First 5 wavenumbers: {ws[:5]}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| exports\n",
"def load_toy_noisy_mir():\n",
" \"\"\"\n",
" Load the toy noisy MIR dataset.\n",
" \n",
" Scans acquired in the context of a K spiking experiment: In-progress publication.\n",
" \"\"\"\n",
" df = pd.read_csv(toy_noisy_mir_url)\n",
" sample_ids = df.sample_id.values\n",
" wavenumbers = df.columns[1:].to_numpy().astype(float)\n",
" spectra = df.iloc[:, 1:].to_numpy()\n",
" return spectra, wavenumbers, sample_ids"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X shape: (48, 3315), First 5 wavenumbers: [599.91153162 600.93702142 601.96251122 602.98800102 604.01349081]\n"
]
}
],
"source": [
"#| eval: false\n",
"X, wns, smp_id = load_toy_noisy_mir()\n",
"print(f'X shape: {X.shape}, First 5 wavenumbers: {wns[:5]}')"
]
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion nbs/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
" - [ ] `GapSegmentDerivative`: ...\n",
"\n",
"- **Smoothing**:\n",
" - [ ] `WaveletDenoise`: Wavelet denoising\n",
" - [x] `WaveletDenoise`: Wavelet denoising\n",
" - [ ] `SavGolSmooth`: Savitzky-Golay smoothing\n",
" \n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ status = 3
user = franckalbinet

### Optional ###
requirements = fastcore scikit-learn matplotlib
requirements = fastcore scikit-learn matplotlib PyWavelets
dev_requirements = soilspecdata nbdev ipykernel
# dev_requirements =
# console_scripts =
12 changes: 10 additions & 2 deletions soilspectfm/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,16 @@
'soilspectfm.core.ToAbsorbance': ('core.html#toabsorbance', 'soilspectfm/core.py'),
'soilspectfm.core.ToAbsorbance.__init__': ('core.html#toabsorbance.__init__', 'soilspectfm/core.py'),
'soilspectfm.core.ToAbsorbance.fit': ('core.html#toabsorbance.fit', 'soilspectfm/core.py'),
'soilspectfm.core.ToAbsorbance.transform': ('core.html#toabsorbance.transform', 'soilspectfm/core.py')},
'soilspectfm.utils': {'soilspectfm.utils.load_toy_mir': ('02.utils.html#load_toy_mir', 'soilspectfm/utils.py')},
'soilspectfm.core.ToAbsorbance.transform': ('core.html#toabsorbance.transform', 'soilspectfm/core.py'),
'soilspectfm.core.WaveletDenoise': ('core.html#waveletdenoise', 'soilspectfm/core.py'),
'soilspectfm.core.WaveletDenoise.__init__': ('core.html#waveletdenoise.__init__', 'soilspectfm/core.py'),
'soilspectfm.core.WaveletDenoise._denoise_single': ( 'core.html#waveletdenoise._denoise_single',
'soilspectfm/core.py'),
'soilspectfm.core.WaveletDenoise.fit': ('core.html#waveletdenoise.fit', 'soilspectfm/core.py'),
'soilspectfm.core.WaveletDenoise.transform': ( 'core.html#waveletdenoise.transform',
'soilspectfm/core.py')},
'soilspectfm.utils': { 'soilspectfm.utils.load_toy_mir': ('02.utils.html#load_toy_mir', 'soilspectfm/utils.py'),
'soilspectfm.utils.load_toy_noisy_mir': ('02.utils.html#load_toy_noisy_mir', 'soilspectfm/utils.py')},
'soilspectfm.visualization': { 'soilspectfm.visualization.plot_spectra': ( 'visualization.html#plot_spectra',
'soilspectfm/visualization.py'),
'soilspectfm.visualization.plot_spectra_comparison': ( 'visualization.html#plot_spectra_comparison',
Expand Down
62 changes: 57 additions & 5 deletions soilspectfm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_core.ipynb.

# %% auto 0
__all__ = ['SNV', 'MSC', 'TakeDerivative', 'ToAbsorbance']
__all__ = ['SNV', 'MSC', 'TakeDerivative', 'ToAbsorbance', 'WaveletDenoise']

# %% ../nbs/00_core.ipynb 3
from fastcore.all import *
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.signal import savgol_filter
from typing import Callable
import pywt

# %% ../nbs/00_core.ipynb 8

# %% ../nbs/00_core.ipynb 7
class SNV(BaseEstimator, TransformerMixin):
"""Standard Normal Variate transformation with flexible centering and scaling.
Expand Down Expand Up @@ -41,7 +43,7 @@ def transform(self, X):
scale = self.scale_func(X - center, axis=1, keepdims=True) + self.eps
return (X - center) / scale

# %% ../nbs/00_core.ipynb 11
# %% ../nbs/00_core.ipynb 10
class MSC(BaseEstimator, TransformerMixin):
"Multiplicative Scatter Correction with fastai-style implementation"
def __init__(self,
Expand Down Expand Up @@ -73,7 +75,7 @@ def transform(self, X: np.ndarray):
if self.reference_ is None: raise ValueError("MSC not fitted. Call 'fit' first.")
return np.array(parallel(self._transform_single, X, n_workers=self.n_jobs))

# %% ../nbs/00_core.ipynb 17
# %% ../nbs/00_core.ipynb 16
class TakeDerivative(BaseEstimator, TransformerMixin):
"Creates scikit-learn derivation + savitsky-golay smoothing custom transformer"
def __init__(self,
Expand All @@ -91,11 +93,61 @@ def fit(self, X, y=None):
def transform(self, X, y=None):
return savgol_filter(X, self.window_length, self.polyorder, self.deriv)

# %% ../nbs/00_core.ipynb 22
# %% ../nbs/00_core.ipynb 21
class ToAbsorbance(BaseEstimator, TransformerMixin):
"Creates scikit-learn transformer to transform reflectance to absorbance"
def __init__(self,
eps: float=1e-5 # Small value to avoid log(0)
): self.eps = eps
def fit(self, X, y=None): return self
def transform(self, X, y=None): return -np.log10(np.clip(X, self.eps, 1))

# %% ../nbs/00_core.ipynb 22
class WaveletDenoise(BaseEstimator, TransformerMixin):
"Wavelet denoising transformer compatible with scikit-learn."
def __init__(self,
wavelet:str='db6', # Wavelet to use for decomposition
level:Optional[int]=None, # Decomposition level. If None, maximum level is used
threshold_mode:str='soft' # Thresholding mode ('soft'/'hard')
):
store_attr()
self.threshold_mode = threshold_mode

def _denoise_single(self, spectrum):
"Denoise a single spectrum"
# If level is None, calculate maximum possible level
if self.level is None:
self.level_ = pywt.dwt_max_level(len(spectrum),
pywt.Wavelet(self.wavelet).dec_len)
else:
self.level_ = self.level

coeffs = pywt.wavedec(spectrum, self.wavelet, level=self.level_)

# Calculate threshold using MAD estimator
detail_coeffs = np.concatenate([c for c in coeffs[1:]])
sigma = np.median(np.abs(detail_coeffs)) / 0.6745
threshold = sigma * np.sqrt(2 * np.log(len(spectrum)))

# Apply threshold to detail coefficients
new_coeffs = list(coeffs)
for i in range(1, len(coeffs)):
new_coeffs[i] = pywt.threshold(coeffs[i],
threshold * (1/2**((self.level_-i)/2)),
mode=self.threshold_mode)

denoised = pywt.waverec(new_coeffs, self.wavelet)

# Ensure output length matches input length
return denoised[:len(spectrum)]

def fit(self, X, y=None):
"Fit the transformer (no-op)"
return self

def transform(self, X):
"Apply wavelet denoising to spectra."
X = np.asarray(X)
X_denoised = np.zeros_like(X)
for i in range(X.shape[0]): X_denoised[i] = self._denoise_single(X[i])
return X_denoised
16 changes: 15 additions & 1 deletion soilspectfm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02.utils.ipynb.

# %% auto 0
__all__ = ['toy_mir_url', 'load_toy_mir']
__all__ = ['toy_mir_url', 'toy_noisy_mir_url', 'load_toy_mir', 'load_toy_noisy_mir']

# %% ../nbs/02.utils.ipynb 3
toy_mir_url = 'https://gist.githubusercontent.com/franckalbinet/a7476d0413e88bcc162c553e43f182fa/raw/a45dc573eec558b019909bdd1830fbb207511fcb/mir-spectra-sample.csv'
toy_noisy_mir_url = 'https://gist.githubusercontent.com/franckalbinet/3e4e16f592175edad724c22841eb88dd/raw/c6a3f83c0146fbdc826aa4e9c1c448120a281a2e/rt_mir_bruker.csv'

# %% ../nbs/02.utils.ipynb 4
import pandas as pd
Expand All @@ -18,3 +19,16 @@ def load_toy_mir():
ws = df_mir.columns.astype(int).to_numpy()
X = df_mir.values
return X, ws

# %% ../nbs/02.utils.ipynb 7
def load_toy_noisy_mir():
"""
Load the toy noisy MIR dataset.
Scans acquired in the context of a K spiking experiment: In-progress publication.
"""
df = pd.read_csv(toy_noisy_mir_url)
sample_ids = df.sample_id.values
wavenumbers = df.columns[1:].to_numpy().astype(float)
spectra = df.iloc[:, 1:].to_numpy()
return spectra, wavenumbers, sample_ids

0 comments on commit efa8ec7

Please sign in to comment.