Skip to content

Commit

Permalink
Merge pull request #44 from f-aguzzi/pre/beta
Browse files Browse the repository at this point in the history
Pre/beta into main: release 2.5.0
  • Loading branch information
f-aguzzi authored Jun 13, 2024
2 parents 21fcb01 + 78d59e4 commit 7ce6c00
Show file tree
Hide file tree
Showing 99 changed files with 50,443 additions and 706 deletions.
53 changes: 53 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,56 @@
## [2.5.0-beta.6](https://github.com/f-aguzzi/tesi/compare/v2.5.0-beta.5...v2.5.0-beta.6) (2024-06-12)


### Bug Fixes

* **pca:** field and property issues ([91cefd3](https://github.com/f-aguzzi/tesi/commit/91cefd3d66475d13ed990ed2911014cc866e2f8d))

## [2.5.0-beta.5](https://github.com/f-aguzzi/tesi/compare/v2.5.0-beta.4...v2.5.0-beta.5) (2024-06-12)


### Bug Fixes

* **BaseActionClass:** trailing commas creating unwanted tuple ([1b057ab](https://github.com/f-aguzzi/tesi/commit/1b057abb23710c0aef8f67875411d63e47a61036))

## [2.5.0-beta.4](https://github.com/f-aguzzi/tesi/compare/v2.5.0-beta.3...v2.5.0-beta.4) (2024-06-12)


### Bug Fixes

* **base:** third round of inheritance fixes ([c3b1bbb](https://github.com/f-aguzzi/tesi/commit/c3b1bbb36c7c150525a6d9d0c87f863291ad397e))

## [2.5.0-beta.3](https://github.com/f-aguzzi/tesi/compare/v2.5.0-beta.2...v2.5.0-beta.3) (2024-06-12)


### Bug Fixes

* **base:** inheritance settings (2) ([103040c](https://github.com/f-aguzzi/tesi/commit/103040c940ba884debaf8fdc2239d3dee0e76ab2))

## [2.5.0-beta.2](https://github.com/f-aguzzi/tesi/compare/v2.5.0-beta.1...v2.5.0-beta.2) (2024-06-12)


### Bug Fixes

* **base:** settings inheritance ([8a14a00](https://github.com/f-aguzzi/tesi/commit/8a14a0036bf3791deda182ec0f3aa618806f180d))

## [2.5.0-beta.1](https://github.com/f-aguzzi/tesi/compare/v2.4.0...v2.5.0-beta.1) (2024-06-12)


### Features

* **base:** added BaseActionClass as root class ([4f0bac8](https://github.com/f-aguzzi/tesi/commit/4f0bac831f6ca215212f6c801b01483ef2d0e5cc))
* LDA and PCA inherit from BaseReducer ([7b797bd](https://github.com/f-aguzzi/tesi/commit/7b797bdfb00404b90fdeaf4178085f2d2bdb9c34))


### chore

* **docs:** upgrade to version 2.5.0 ([387499f](https://github.com/f-aguzzi/tesi/commit/387499f5add4ceae62d07891ffd531406607cdaf))


### Docs

* finish first case study ([21fcb01](https://github.com/f-aguzzi/tesi/commit/21fcb014b8b510fe7718e1443a1daad0afd09e0e))

## [2.4.0](https://github.com/f-aguzzi/tesi/compare/v2.3.0...v2.4.0) (2024-06-11)


Expand Down
97 changes: 75 additions & 22 deletions chemfusekit/__base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
'''A base class for all classifiers.'''
"""A base class for all classifiers."""
from abc import ABC, abstractmethod

import pandas as pd
import numpy as np
Expand All @@ -10,7 +11,7 @@


class BaseDataModel:
'''Models the output data from data-outputting operations'''
"""Models the output data from data-outputting operations"""

def __init__(self, x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray):
self.x_data = x_data
Expand Down Expand Up @@ -83,45 +84,39 @@ def export_to_file(self, export_path: str, sheet_name: str = 'Sheet1'):
raise ValueError(f"Unsupported file format: {export_path}")

def __getitem__(self, index):
'''Get an item with array-style indexing'''
"""Get an item with array-style indexing"""
return pd.DataFrame(self.x_data.iloc[index, :]).transpose()


class BaseSettings:
'''Holds the settings for the BaseClassifier object.'''
"""Holds the settings for all objects with settings."""

def __init__(self, output: GraphMode = GraphMode.NONE):
self.output = output


class BaseClassifierSettings(BaseSettings):
"""Holds the settings for the BaseClassifier object."""

def __init__(self, output: GraphMode = GraphMode.NONE, test_split: bool = False):
super().__init__(output)
if test_split is True and output is GraphMode.NONE:
raise Warning(
"You selected test_split but it won't run because you disabled the output."
)
self.output = output
self.test_split = test_split


class BaseClassifier:
'''Parent class for all classifiers, containing basic shared utilities.'''

class BaseActionClass(ABC):
"""Abstract base class for all reducers and classifiers."""
def __init__(self, settings: BaseSettings, data: BaseDataModel):
self.settings = settings
self.data = data
self.model: BaseEstimator | None = None

def import_model(self, import_path: str):
model = joblib.load(import_path)
if not isinstance(model, BaseEstimator):
raise ImportError("The file you tried importing is not a sklearn model!")
self.model = model

def export_model(self, export_path: str):
if self.model is not None:
joblib.dump(self.model, export_path)
else:
raise RuntimeError("You haven't trained the model yet! You cannot export it now.")

@classmethod
def from_file(cls, settings, model_path):
'''Creates a classifier instance from file'''
"""Creates a classifier instance from file"""
x_data = pd.DataFrame()
y_dataframe = pd.DataFrame(columns=['Substance'])
x_train = pd.concat([y_dataframe, x_data], axis=1)
Expand All @@ -134,13 +129,71 @@ def from_file(cls, settings, model_path):
class_instance = cls(settings, data)
class_instance.import_model(model_path)
return class_instance

def import_model(self, import_path: str):
"""Imports a sklearn model from a file."""
try:
model = joblib.load(import_path)
except Exception as exc:
raise ImportError("The file you tried importing is not a valid Python object!") from exc
if not isinstance(model, BaseEstimator):
raise ImportError("The file you tried importing is not a sklearn model!")
self.model = model

def export_model(self, export_path: str):
"""Exports the underlying sklearn model to a file."""
if self.model is not None:
joblib.dump(self.model, export_path)
else:
raise RuntimeError("You haven't trained the model yet! You cannot export it now.")


class BaseClassifier(BaseActionClass):
"""Parent class for all classifiers, containing basic shared utilities."""

def __init__(self, settings: BaseClassifierSettings, data: BaseDataModel):
super().__init__(settings, data)

def predict(self, x_data: pd.DataFrame):
'''Performs prediction once the model is trained.'''
"""Performs prediction once the model is trained."""
if x_data is None:
raise TypeError(f"X data for {self.__class__.__name__} prediction must be non-empty.")
if self.model is None:
raise RuntimeError(f"The {self.__class__.__name__} model is not trained yet!")

y_pred = self.model.predict(x_data)
return y_pred


class BaseReducer(BaseActionClass):
"""Parent class for all reducers (decomposition-performing classes), containing basic shared utilities."""

def __init__(self, settings: BaseSettings, data: BaseDataModel):
super().__init__(settings, data)

@abstractmethod
def export_data(self) -> BaseDataModel:
pass

def reduce(self, data: BaseDataModel) -> BaseDataModel:
"""Reduces dimensionality of data."""
if self.model is None:
raise RuntimeError(
"The model hasn't been trained yet! You cannot use it to reduce data dimensionality."
)
x_data = pd.DataFrame(self.model.transform(data.x_data))
y_dataframe = pd.DataFrame(data.y)
x_train = pd.concat(
[y_dataframe, x_data],
axis=1
)
return BaseDataModel(
x_data=x_data,
x_train=x_train,
y=data.y
)

@property
@abstractmethod
def rescaled_data(self) -> BaseDataModel:
pass
2 changes: 1 addition & 1 deletion chemfusekit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'''__init__.py file for the library'''
"""__init__.py file for the library"""
from beartype import BeartypeConf
from beartype.claw import beartype_this_package
beartype_this_package(conf=BeartypeConf(violation_type=TypeError))
38 changes: 20 additions & 18 deletions chemfusekit/__utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'''Utilities module: functions that are shared between different classes'''
"""Utilities module: functions that are shared between different classes"""
from sklearn.cross_decomposition import PLSRegression as PLSR
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
Expand All @@ -16,13 +16,15 @@

from enum import Enum, auto


class GraphMode(Enum):
TEXT = auto()
GRAPHIC = auto()
NONE = auto()


def graph_output(scores, model, name: str, mode: GraphMode = GraphMode.GRAPHIC):
'''A reusable graphing function.'''
"""A reusable graphing function."""
# Return early if output is disabled
if mode is GraphMode.NONE:
return
Expand All @@ -38,7 +40,7 @@ def graph_output(scores, model, name: str, mode: GraphMode = GraphMode.GRAPHIC):

# Display the explained variance ratio
print("Explained Variance Ratio:", model.explained_variance_ratio_)

if mode is GraphMode.GRAPHIC:
# Scores table
print_table(
Expand All @@ -47,7 +49,7 @@ def graph_output(scores, model, name: str, mode: GraphMode = GraphMode.GRAPHIC):
f"{name} Scores"
)

#Scores plot
# Scores plot
fig = px.scatter(scores, x="LV1", y="LV2", color="Substance", hover_data=['Substance'])
fig.update_xaxes(zeroline=True, zerolinewidth=1, zerolinecolor='Black')
fig.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor='Black')
Expand All @@ -61,25 +63,25 @@ def graph_output(scores, model, name: str, mode: GraphMode = GraphMode.GRAPHIC):
fig = px.scatter_3d(scores, x='LV1', y='LV2', z='LV3',
color='Substance', hover_data=['Substance'],
hover_name=scores.index
)
)
fig.update_layout(title_text=f"3D colored by Substance for {name}")
fig.show()


def print_table(header_values, cell_values, title: str, mode: GraphMode = GraphMode.GRAPHIC):
'''Multimodal table printing utility.'''
"""Multimodal table printing utility."""
# Return early if output is disabled
if mode is GraphMode.NONE:
return
# Graphical table printing
elif mode is GraphMode.GRAPHIC:
fig = go.Figure(data=[go.Table(
header=dict(values=header_values,
fill_color='paleturquoise',
align='left'),
cells=dict(values=cell_values,
fill_color='lavender',
align='left'))
header=dict(values=header_values,
fill_color='paleturquoise',
align='left'),
cells=dict(values=cell_values,
fill_color='lavender',
align='left'))
])
fig.update_layout(title=title)
fig.show()
Expand All @@ -94,7 +96,7 @@ def print_table(header_values, cell_values, title: str, mode: GraphMode = GraphM


def run_split_test(x, y, model, extended=False, mode: GraphMode = GraphMode.GRAPHIC):
'''A function to run split tests on trained models.'''
"""A function to run split tests on trained models."""

# Return early if there's nothing to print
if mode is GraphMode.NONE:
Expand All @@ -121,11 +123,11 @@ def run_split_test(x, y, model, extended=False, mode: GraphMode = GraphMode.GRAP

# See the classes the model used
classes = model.classes_
if isinstance(model, LogisticRegression) and len(classes) == 2: # Binary classifier
if isinstance(model, LogisticRegression) and len(classes) == 2: # Binary classifier
classes = [" / ".join(classes)]
classes = np.asarray(classes)
classes = classes.reshape((1, 1))
else: # All other cases
else: # All other cases
classes = classes.reshape((1, len(classes)))

coefficients = model.coef_.transpose()
Expand Down Expand Up @@ -192,7 +194,7 @@ def run_split_test(x, y, model, extended=False, mode: GraphMode = GraphMode.GRAP


def print_confusion_matrix(y1, y2, title: str, mode: GraphMode = GraphMode.GRAPHIC):
'''Function to simplify the plotting of confusion matrices'''
"""Function to simplify the plotting of confusion matrices"""

# Return early if there's nothing to print
if mode is GraphMode.NONE:
Expand All @@ -202,7 +204,7 @@ def print_confusion_matrix(y1, y2, title: str, mode: GraphMode = GraphMode.GRAPH

# Get unique class labels from y_true
class_labels = sorted(set(y2))

# Create the report
report = classification_report(y1, y2, digits=2, output_dict=True)

Expand Down Expand Up @@ -236,5 +238,5 @@ def print_confusion_matrix(y1, y2, title: str, mode: GraphMode = GraphMode.GRAPH
['substance'] + list(cr.columns),
[cr.index, cr['precision'], cr['recall'], cr['f1-score'], cr['support']],
"Classification Report",
mode
mode
)
24 changes: 10 additions & 14 deletions chemfusekit/knn.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
'''k-Nearest Neighbors Analysis module'''
"""k-Nearest Neighbors Analysis module"""
from copy import copy
from typing import Optional
from beartype.typing import Callable

from sklearn.neighbors import KNeighborsClassifier

import pandas as pd

from chemfusekit.lldf import LLDFDataModel
from chemfusekit.__utils import run_split_test, print_confusion_matrix, print_table, GraphMode
from .__base import BaseSettings, BaseClassifier
from .__base import BaseClassifierSettings, BaseClassifier, BaseDataModel


class KNNSettings(BaseSettings):
'''Holds the settings for the kNN object.'''
class KNNSettings(BaseClassifierSettings):
"""Holds the settings for the kNN object."""
def __init__(self, n_neighbors: int = 15, metric: str | Callable = 'euclidean', weights: str | Callable = 'uniform',
algorithm: str = 'auto', output: GraphMode = GraphMode.NONE, test_split: bool = False):

Expand All @@ -28,7 +24,7 @@ def __init__(self, n_neighbors: int = 15, metric: str | Callable = 'euclidean',
if weights not in ['uniform', 'distance'] and not callable(weights):
raise ValueError("Invalid weight: should be 'uniform', 'distance' or a callable")
if algorithm not in ['auto', 'ball_tree', 'kd_tree', 'brute']:
raise ValueError(
raise ValueError(
"Invalid algorithm: should be 'auto', 'ball_tree', 'kd_tree' or 'brute'."
)
self.n_neighbors = n_neighbors
Expand All @@ -38,12 +34,12 @@ def __init__(self, n_neighbors: int = 15, metric: str | Callable = 'euclidean',


class KNN(BaseClassifier):
'''Class to store the data, methods and artifacts for k-Nearest Neighbors Analysis'''
def __init__(self, settings: KNNSettings, fused_data: LLDFDataModel):
super().__init__(settings, fused_data)
"""Class to store the data, methods and artifacts for k-Nearest Neighbors Analysis"""
def __init__(self, settings: KNNSettings, data: BaseDataModel):
super().__init__(settings, data)

def knn(self):
'''Performs k-Nearest Neighbors Analysis'''
"""Performs k-Nearest Neighbors Analysis"""
# Prepare and train the kNN model
knn = KNeighborsClassifier(
n_neighbors=self.settings.n_neighbors,
Expand All @@ -60,7 +56,7 @@ def knn(self):
y_pred = knn.predict(self.data.x_data)
print_table(
["Predictions"],
y_pred.reshape(1,len(y_pred)),
y_pred.reshape(1, len(y_pred)),
"Data and predictions",
self.settings.output
)
Expand Down
Loading

0 comments on commit 7ce6c00

Please sign in to comment.