Skip to content

Commit

Permalink
Another refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Sep 23, 2024
1 parent 5e56b21 commit 9b74ada
Show file tree
Hide file tree
Showing 9 changed files with 15 additions and 39 deletions.
2 changes: 2 additions & 0 deletions fslite/fs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def is_valid_univariate_method(method_name: str) -> bool:
return True
return False


def is_valid_multivariate_method(method_name: str) -> bool:
"""
This method check if the given method name is a supported multivariate method
Expand All @@ -112,6 +113,7 @@ def is_valid_multivariate_method(method_name: str) -> bool:
return True
return False


def is_valid_ml_method(method_name: str) -> bool:
"""
This method check if the given method name is a supported machine learning method
Expand Down
4 changes: 2 additions & 2 deletions fslite/fs/methods.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from abc import ABC, abstractmethod
from typing import List, Type, Union, Optional, Dict, Any
from typing import List

from fslite.fs.constants import get_fs_method_details
from fslite.fs.fdataframe import FSDataFrame



class FSMethod(ABC):
"""
Feature selection abtract class, this class defines the basic structure of a feature selection method.
Expand Down Expand Up @@ -60,6 +59,7 @@ def set_params(self, **kwargs):
"""
self.kwargs.update(kwargs)


# class FSPipeline:
# """
# The FSPipeline class creates a pipeline of feature selection methods. It provides a way to
Expand Down
6 changes: 0 additions & 6 deletions fslite/fs/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
"""

import warnings
from typing import List, Any, Dict, Optional, Union

import pandas as pd
from fslite.fs.constants import get_fs_ml_methods, is_valid_ml_method
from fslite.fs.fdataframe import FSDataFrame
from fslite.fs.methods import FSMethod, InvalidMethodError
Expand Down Expand Up @@ -243,8 +239,6 @@ def __repr__(self):
return self.__str__()




#
#
# # Define an abstract class that allow to create a factory of models
Expand Down
29 changes: 4 additions & 25 deletions fslite/fs/multivariate.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,16 @@
# import logging
# from typing import List
#
# import numpy as np
# import pyspark
# from pyspark.ml.feature import VarianceThresholdSelector
# from pyspark.ml.stat import Correlation
#
# from fslite.fs.constants import (
# MULTIVARIATE_METHODS,
# MULTIVARIATE_CORRELATION,
# MULTIVARIATE_VARIANCE,
# )
#
# from fslite.fs.core import FSDataFrame
# from fslite.fs.utils import find_maximal_independent_set
# from fslite.utils.generic import tag
#
# logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s")
# logger = logging.getLogger("FSSPARK:MULTIVARIATE")
# logger.setLevel(logging.INFO)
#
import logging
from typing import List

import numpy as np
from scipy.stats import spearmanr

from fslite.fs.constants import get_fs_multivariate_methods
from fslite.fs.constants import get_fs_multivariate_methods, is_valid_multivariate_method
from fslite.fs.fdataframe import FSDataFrame
from fslite.fs.methods import FSMethod, InvalidMethodError
from fslite.fs.utils import find_maximal_independent_set

logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s")
logger = logging.getLogger("FS:UNIVARIATE")
logger = logging.getLogger("FS:MULTIVARIATE")
logger.setLevel(logging.INFO)

class FSMultivariate(FSMethod):
Expand Down Expand Up @@ -102,7 +80,8 @@ def multivariate_filter(
if multivariate_method == "m_corr":
selected_features = multivariate_correlation_selector(fsdf, **kwargs)
elif multivariate_method == "variance":
selected_features = multivariate_variance_selector(fsdf, **kwargs)
# selected_features = multivariate_variance_selector(fsdf, **kwargs)
logging.info("Variance method not implemented yet.")
else:
raise ValueError(
f"Invalid multivariate method: {multivariate_method}. "
Expand Down
1 change: 1 addition & 0 deletions fslite/pipeline/fs_pipeline_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from fslite.config.context import init_spark, stop_spark_session
from fslite.fs.core import FSDataFrame

from fslite.fs.methods import FSPipeline, FSUnivariate, FSMultivariate, FSMLMethod
from fslite.utils.datasets import get_tnbc_data_path
from fslite.utils.io import import_table_as_psdf
Expand Down
2 changes: 1 addition & 1 deletion fslite/tests/generate_big_tests.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

import pandas as pd
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

Expand Down
5 changes: 3 additions & 2 deletions fslite/tests/test_fsdataframe.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import gc

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from memory_profiler import memory_usage
import gc

from fslite.fs.fdataframe import FSDataFrame

Expand Down
4 changes: 2 additions & 2 deletions fslite/tests/test_univariate_methods.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pandas as pd
from fslite.utils.datasets import get_tnbc_data_path
from fslite.fs.fdataframe import FSDataFrame

from fslite.fs.fdataframe import FSDataFrame
from fslite.fs.univariate import FSUnivariate
from fslite.utils.datasets import get_tnbc_data_path


def test_univariate_filter_corr():
Expand Down
1 change: 0 additions & 1 deletion fslite/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pyspark.pandas
import pyspark.sql

from fslite.config.context import PANDAS_ON_SPARK_API_SETTINGS

warnings.filterwarnings("ignore")
Expand Down

0 comments on commit 9b74ada

Please sign in to comment.