Skip to content

Commit

Permalink
add Resample and SavGolSmoothing preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
franckalbinet committed Jan 23, 2025
1 parent efa8ec7 commit a5242e6
Show file tree
Hide file tree
Showing 8 changed files with 364 additions and 98 deletions.
23 changes: 17 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ pip install soilspectfm
## Quick Start

``` python
from soilspectfm.core import SNV, TakeDerivative, ToAbsorbance
from soilspectfm.core import (SNV,
TakeDerivative,
ToAbsorbance,
Resample,
WaveletDenoise)

from sklearn.pipeline import Pipeline
```

Expand Down Expand Up @@ -50,30 +55,35 @@ Implemented transforms developed so far include:
- [x]
[`MSC`](https://franckalbinet.github.io/soilspectfm/core.html#msc):
Multiplicative Scatter Correction
- [ ] `Detrend`: Detrend the spectrum (SOON)
- [ ] `ALS`: Asymmetric Least Squares detrend the spectrum (SOON)
- [ ] `Detrend`: Detrend the spectrum (coming soon …)
- [ ] `ALS`: Asymmetric Least Squares detrend the spectrum (coming
soon …)

- **Derivatives**:

- [x]
[`TakeDerivative`](https://franckalbinet.github.io/soilspectfm/core.html#takederivative):
Take derivative (1st, 2nd, etc.) of the spectrum and apply
Savitzky-Golay smoothing
- [ ] `GapSegmentDerivative`:
- [ ] `GapSegmentDerivative`: (coming soon …)

- **Smoothing**:

- [x]
[`WaveletDenoise`](https://franckalbinet.github.io/soilspectfm/core.html#waveletdenoise):
Wavelet denoising
- [ ] `SavGolSmooth`: Savitzky-Golay smoothing
- [x]
[`SavGolSmooth`](https://franckalbinet.github.io/soilspectfm/core.html#savgolsmooth):
Savitzky-Golay smoothing

- **Other transformations**:

- [x]
[`ToAbsorbance`](https://franckalbinet.github.io/soilspectfm/core.html#toabsorbance):
Transform the spectrum to absorbance
- [ ] `Resample`: Resample the spectrum to a new wavenumber range
- [x]
[`Resample`](https://franckalbinet.github.io/soilspectfm/core.html#resample):
Resample the spectrum to a new wavenumber range

Transforms are fully compatible with
[scikit-learn](https://scikit-learn.org/stable/) and can be used in a
Expand All @@ -82,6 +92,7 @@ pipeline as follows:
``` python
pipe = Pipeline([
('snv', SNV()), # Standard Normal Variate transformation
('denoise', WaveletDenoise()), # Wavelet denoising
('deriv', TakeDerivative(window_length=11, polyorder=2, deriv=1)) # First derivative
])

Expand Down
Binary file modified index_files/figure-commonmark/cell-7-output-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
241 changes: 204 additions & 37 deletions nbs/00_core.ipynb

Large diffs are not rendered by default.

21 changes: 13 additions & 8 deletions nbs/index.ipynb

Large diffs are not rendered by default.

35 changes: 15 additions & 20 deletions settings.ini
Original file line number Diff line number Diff line change
@@ -1,44 +1,39 @@
[DEFAULT]
# All sections below are required unless otherwise specified.
# See https://github.com/fastai/nbdev/blob/master/settings.ini for examples.

### Python library ###
repo = soilspectfm
lib_name = %(repo)s
version = 0.0.1
lib_name = soilspectfm
version = 0.0.2
min_python = 3.7
license = apache2
black_formatting = False

### nbdev ###
doc_path = _docs
lib_path = soilspectfm
nbs_path = nbs
recursive = True
tst_flags = notest
put_version_in_init = True

### Docs ###
branch = main
custom_sidebar = True
doc_host = https://%(user)s.github.io
doc_baseurl = /%(repo)s
git_url = https://github.com/%(user)s/%(repo)s
doc_host = https://franckalbinet.github.io
doc_baseurl = /soilspectfm
git_url = https://github.com/franckalbinet/soilspectfm
title = SoilSpecTfm

### PyPI ###
audience = Developers
author = Franck Albinet
author_email = franckalbinet@gmail.com
copyright = 2025 onwards, %(author)s
copyright = 2025 onwards, Franck Albinet
description = Soil infrared spectra preprocessing utilities
keywords = nbdev jupyter notebook python
language = English
status = 3
user = franckalbinet

### Optional ###
requirements = fastcore scikit-learn matplotlib PyWavelets
dev_requirements = soilspecdata nbdev ipykernel
# dev_requirements =
# console_scripts =
readme_nb = index.ipynb
allowed_metadata_keys =
allowed_cell_metadata_keys =
jupyter_hooks = False
clean_ids = True
clear_all = False
cell_number = True
skip_procs =

2 changes: 1 addition & 1 deletion soilspectfm/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.1"
__version__ = "0.0.2"
10 changes: 10 additions & 0 deletions soilspectfm/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,20 @@
'soilspectfm.core.MSC._transform_single': ('core.html#msc._transform_single', 'soilspectfm/core.py'),
'soilspectfm.core.MSC.fit': ('core.html#msc.fit', 'soilspectfm/core.py'),
'soilspectfm.core.MSC.transform': ('core.html#msc.transform', 'soilspectfm/core.py'),
'soilspectfm.core.Resample': ('core.html#resample', 'soilspectfm/core.py'),
'soilspectfm.core.Resample.__init__': ('core.html#resample.__init__', 'soilspectfm/core.py'),
'soilspectfm.core.Resample.fit': ('core.html#resample.fit', 'soilspectfm/core.py'),
'soilspectfm.core.Resample.transform': ('core.html#resample.transform', 'soilspectfm/core.py'),
'soilspectfm.core.SNV': ('core.html#snv', 'soilspectfm/core.py'),
'soilspectfm.core.SNV.__init__': ('core.html#snv.__init__', 'soilspectfm/core.py'),
'soilspectfm.core.SNV.fit': ('core.html#snv.fit', 'soilspectfm/core.py'),
'soilspectfm.core.SNV.transform': ('core.html#snv.transform', 'soilspectfm/core.py'),
'soilspectfm.core.SavGolSmooth': ('core.html#savgolsmooth', 'soilspectfm/core.py'),
'soilspectfm.core.SavGolSmooth.__init__': ('core.html#savgolsmooth.__init__', 'soilspectfm/core.py'),
'soilspectfm.core.SavGolSmooth._validate_params': ( 'core.html#savgolsmooth._validate_params',
'soilspectfm/core.py'),
'soilspectfm.core.SavGolSmooth.fit': ('core.html#savgolsmooth.fit', 'soilspectfm/core.py'),
'soilspectfm.core.SavGolSmooth.transform': ('core.html#savgolsmooth.transform', 'soilspectfm/core.py'),
'soilspectfm.core.TakeDerivative': ('core.html#takederivative', 'soilspectfm/core.py'),
'soilspectfm.core.TakeDerivative.__init__': ('core.html#takederivative.__init__', 'soilspectfm/core.py'),
'soilspectfm.core.TakeDerivative.fit': ('core.html#takederivative.fit', 'soilspectfm/core.py'),
Expand Down
130 changes: 104 additions & 26 deletions soilspectfm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,34 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_core.ipynb.

# %% auto 0
__all__ = ['SNV', 'MSC', 'TakeDerivative', 'ToAbsorbance', 'WaveletDenoise']
__all__ = ['SNV', 'MSC', 'TakeDerivative', 'WaveletDenoise', 'SavGolSmooth', 'ToAbsorbance', 'Resample']

# %% ../nbs/00_core.ipynb 3
from fastcore.all import *
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.signal import savgol_filter
from scipy.interpolate import CubicSpline
from typing import Callable
import pywt


# %% ../nbs/00_core.ipynb 7
class SNV(BaseEstimator, TransformerMixin):
"""Standard Normal Variate transformation with flexible centering and scaling.
Common centering functions:
- np.mean: Standard choice, sensitive to outliers
- np.median: Robust to outliers, slower computation
- np.min: Ensures positive values, sensitive to noise
- lambda x, **kw: 0: No centering, preserves absolute values
- np.mean: Standard choice, sensitive to outliers
- np.median: Robust to outliers, slower computation
- np.min: Ensures positive values, sensitive to noise
- lambda x, **kw: 0: No centering, preserves absolute values
Common scaling functions:
- np.std: Standard choice, assumes normal distribution
- lambda x, **kw: np.sqrt(np.mean(x**2, **kw)): RMS, good for baseline variations
- scipy.stats.iqr: Robust to outliers, ignores extreme peaks
- lambda x, **kw: np.max(x, **kw) - np.min(x, **kw): Preserves relative peaks
- lambda x, **kw: np.median(np.abs(x - np.median(x, **kw)), **kw): Most robust, slower
- np.std: Standard choice, assumes normal distribution
- lambda x, **kw: np.sqrt(np.mean(x**2, **kw)): RMS, good for baseline variations
- scipy.stats.iqr: Robust to outliers, ignores extreme peaks
- lambda x, **kw: np.max(x, **kw) - np.min(x, **kw): Preserves relative peaks
- lambda x, **kw: np.median(np.abs(x - np.median(x, **kw)), **kw): Most robust, slower
"""
def __init__(self,
center_func: Callable=np.mean, # Function to center the data
Expand All @@ -38,7 +39,9 @@ def __init__(self,
):
store_attr()
def fit(self, X, y=None): return self
def transform(self, X):
def transform(self,
X: np.ndarray # Spectral data to be transformed
) -> np.ndarray: # Transformed spectra
center = self.center_func(X, axis=1, keepdims=True)
scale = self.scale_func(X - center, axis=1, keepdims=True) + self.eps
return (X - center) / scale
Expand All @@ -65,12 +68,16 @@ def fit(self, X: np.ndarray, y=None):
self.reference_ = self._compute_reference(X)
return self

def _transform_single(self, x: np.ndarray):
def _transform_single(self,
x: np.ndarray # Spectral data to be transformed
) -> np.ndarray: # Transformed spectra
"Transform a single spectrum"
coef = np.polyfit(self.reference_, x, deg=1)
return (x - coef[1]) / coef[0]

def transform(self, X: np.ndarray):
def transform(self,
X: np.ndarray # Spectral data to be transformed
) -> np.ndarray: # Transformed spectra
"Apply MSC to the spectra"
if self.reference_ is None: raise ValueError("MSC not fitted. Call 'fit' first.")
return np.array(parallel(self._transform_single, X, n_workers=self.n_jobs))
Expand All @@ -94,15 +101,6 @@ def transform(self, X, y=None):
return savgol_filter(X, self.window_length, self.polyorder, self.deriv)

# %% ../nbs/00_core.ipynb 21
class ToAbsorbance(BaseEstimator, TransformerMixin):
"Creates scikit-learn transformer to transform reflectance to absorbance"
def __init__(self,
eps: float=1e-5 # Small value to avoid log(0)
): self.eps = eps
def fit(self, X, y=None): return self
def transform(self, X, y=None): return -np.log10(np.clip(X, self.eps, 1))

# %% ../nbs/00_core.ipynb 22
class WaveletDenoise(BaseEstimator, TransformerMixin):
"Wavelet denoising transformer compatible with scikit-learn."
def __init__(self,
Expand All @@ -111,7 +109,6 @@ def __init__(self,
threshold_mode:str='soft' # Thresholding mode ('soft'/'hard')
):
store_attr()
self.threshold_mode = threshold_mode

def _denoise_single(self, spectrum):
"Denoise a single spectrum"
Expand All @@ -137,8 +134,6 @@ def _denoise_single(self, spectrum):
mode=self.threshold_mode)

denoised = pywt.waverec(new_coeffs, self.wavelet)

# Ensure output length matches input length
return denoised[:len(spectrum)]

def fit(self, X, y=None):
Expand All @@ -151,3 +146,86 @@ def transform(self, X):
X_denoised = np.zeros_like(X)
for i in range(X.shape[0]): X_denoised[i] = self._denoise_single(X[i])
return X_denoised

# %% ../nbs/00_core.ipynb 23
class SavGolSmooth(BaseEstimator, TransformerMixin):
"Savitzky-Golay smoothing transformer compatible with scikit-learn."
def __init__(self,
window_length:int=15, # Window length for the savgol filter
polyorder:int=3, # Polynomial order for the savgol filter
deriv:int=0 # Derivation degree
):
store_attr()

def _validate_params(self):
"Validate parameters."
if self.window_length % 2 == 0:
raise ValueError("window_length must be odd")
if self.window_length <= self.polyorder:
raise ValueError("window_length must be greater than polyorder")
if self.deriv > self.polyorder:
raise ValueError("deriv must be <= polyorder")

def fit(self,
X:np.ndarray,# Spectral data to be smoothed.
y:Optional[np.ndarray]=None # Ignored
):
"Validate parameters and fit the transformer."
self._validate_params()
return self

def transform(self,
X: np.ndarray # Spectral data to be smoothed.
) -> np.ndarray: # Smoothed spectra
"Apply Savitzky-Golay filter to spectra."
X = np.asarray(X)
X_smoothed = np.zeros_like(X)

for i in range(X.shape[0]):
X_smoothed[i] = savgol_filter(X[i],
window_length=self.window_length,
polyorder=self.polyorder,
deriv=self.deriv)

return X_smoothed

# %% ../nbs/00_core.ipynb 26
class ToAbsorbance(BaseEstimator, TransformerMixin):
"Creates scikit-learn transformer to transform reflectance to absorbance"
def __init__(self,
eps: float=1e-5 # Small value to avoid log(0)
): self.eps = eps
def fit(self, X, y=None): return self
def transform(self, X, y=None): return -np.log10(np.clip(X, self.eps, 1))

# %% ../nbs/00_core.ipynb 27
class Resample(BaseEstimator, TransformerMixin):
"Resampling transformer compatible with scikit-learn."
def __init__(self,
target_x: np.ndarray, # Target x-axis points (wavenumbers or wavelengths) for resampling
interpolation_kind: str='cubic' # Type of spline interpolation to use
):
store_attr()

def fit(self,
X: np.ndarray, # Spectral data to be resampled
x: np.ndarray=None, # Original x-axis points (wavenumbers or wavelengths)
y: np.ndarray=None # Original y-axis points
):
"Fit the transformer"
if x is None: raise ValueError("Original x-axis (wavenumbers or wavelengths) must be provided")
self.original_x_ = np.asarray(x)
return self

def transform(self,
X: np.ndarray # Spectral data to be resampled
):
"Resample spectra to new x-axis points."
X = np.asarray(X)
X_transformed = np.zeros((X.shape[0], len(self.target_x)))

for i in range(X.shape[0]):
cs = CubicSpline(self.original_x_, X[i])
X_transformed[i] = cs(self.target_x)

return X_transformed

0 comments on commit a5242e6

Please sign in to comment.