Skip to content

Commit

Permalink
minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
saanikat committed Sep 16, 2024
1 parent 55d9a14 commit 8d8b1a6
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 11 deletions.
1 change: 1 addition & 0 deletions bedms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""
This module initializes 'bedms' package.
"""

from .attr_standardizer import AttrStandardizer
2 changes: 1 addition & 1 deletion bedms/attr_standardizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _get_parameters(self) -> Tuple[int, int, int, int, int, float]:
"Presently, three schemas are available: ENCODE , FAIRTRACKS, BEDBASE"
)

def _load_model(self) -> tuple[nn.Module, object, object]:
def _load_model(self) -> Tuple[nn.Module, object, object]:
"""
Calls function to load the model from HuggingFace repository
load vectorizer and label encoder and sets to eval().
Expand Down
4 changes: 2 additions & 2 deletions bedms/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def __init__(
Initializes the BoWSTModel.
:param int input_size_values: Size of the input for the values (BoW).
:param int inout_size_values_embeddings: Size of the input
:param int inout_size_values_embeddings: Size of the input
for the values sentence transformer embeddings.
:param int input_size_headers: Size of the input
:param int input_size_headers: Size of the input
for the headers with sentence transformer embeddings.
:param int hidden_size: Size of the hidden layer.
:param int output_size: Size of the output layer.
Expand Down
17 changes: 9 additions & 8 deletions bedms/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This module has all util functions for 'bedms'
"""

import warnings
from collections import Counter
from typing import Any, List, Optional, Tuple, Union
Expand All @@ -21,7 +22,7 @@
MODEL_FAIRTRACKS,
NUM_CLUSTERS,
REPO_ID,
PEP_FILE_TYPES
PEP_FILE_TYPES,
)

# TODO : convert to single np array before converting to tensor
Expand Down Expand Up @@ -69,10 +70,10 @@ def data_preprocessing(
:param pd.DataFrame df: The input DataFrame (user chosen PEP) to preprocess.
:return Tuple[List[List[str]], List[str], List[List[str]]]:
- Nested list containing the comma separated values
- Nested list containing the comma separated values
in each column for sentence transformer embeddings.
- List containing the headers of the DataFrame.
- Nested list containing the comma separated values
- Nested list containing the comma separated values
in each column for Bag of Words encoding.
- Number of rows in the metadata csv
"""
Expand Down Expand Up @@ -163,15 +164,15 @@ def data_encoding(
:param object vectorizer: scikit-learn vectorizer for bag of words encoding.
:param object label_encoder" Label encoder object storing labels (y)
:param int num_rows: Number of rows in the sample metadata
:param list X_values_st: Nested list containing the comma separated values
:param list X_values_st: Nested list containing the comma separated values
in each column for sentence transformer embeddings.
:param list X_headers_st: List containing the headers of the DataFrame.
:param list X_values_bow: Nested list containing the comma separated values
:param list X_values_bow: Nested list containing the comma separated values
in each column for Bag of Words encoding.
:param str schema: Schema type chosen by the user for standardization.
:return Tuple[torch.Tensor, torch.Tensor, torch.Tensor,
Union[LabelEncoder, None]]: Tuple containing
torch tensors for encoded embeddings and Bag of Words representations,
:return Tuple[torch.Tensor, torch.Tensor, torch.Tensor,
Union[LabelEncoder, None]]: Tuple containing
torch tensors for encoded embeddings and Bag of Words representations,
and label encoder object.
"""
# Sentence Transformer Model
Expand Down

0 comments on commit 8d8b1a6

Please sign in to comment.