Skip to content

Commit

Permalink
black
Browse files Browse the repository at this point in the history
  • Loading branch information
saanikat committed Oct 2, 2024
1 parent 089755a commit a4863b0
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 19 deletions.
2 changes: 1 addition & 1 deletion bedms/attr_standardizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(
Initializes the attribute standardizer with user provided schema, loads the model.
:param str schema: User provided schema, can be "ENCODE" or "FAIRTRACKS"
:param str custom_param: User provided config file for
:param str custom_param: User provided config file for
custom parameters, if they choose "CUSTOM" schema.
:param int confidence: Confidence threshold for the predictions.
"""
Expand Down
1 change: 1 addition & 0 deletions bedms/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This module has all util functions for 'bedms'
"""

import logging
import warnings
from collections import Counter
Expand Down
37 changes: 19 additions & 18 deletions bedms/utils_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
message="Creating a tensor from a list of numpy.ndarrays is extremely slow.",
)


def load_from_dir(dir: str) -> List[str]:
"""
Loads each file from the directory path.
Expand Down Expand Up @@ -69,12 +70,12 @@ def accumulate_data(
"""
Accumulates data from multiple files into lists.
:param List[Tuple[str, str]] files: List containing
:param List[Tuple[str, str]] files: List containing
sublists of values or header files.
:return Tuple[List[List[List[str]]], List[List[List[str]]],[List[str]]:
:return Tuple[List[List[List[str]]], List[List[List[str]]],[List[str]]:
Lists of values, headers, labels.
A tuple containing three lists:
- A nested list of values (list of tables where
- A nested list of values (list of tables where
each table is a list of lists for columns),
- A nested list of headers (similar structure to values),
- A list of Pandas Index objects containing column labels.
Expand Down Expand Up @@ -125,7 +126,7 @@ def get_top_training_cluster_averaged(
:param List[torch.tensor] embeddings: List of embedding tensors to cluster.
:param int num: Number of clusters to be created using k-means.
:return torch.Tensor: A tensor representing the
:return torch.Tensor: A tensor representing the
average of embeddings in the most common cluster.
"""
flattened_embeddings = [embedding.tolist() for embedding in embeddings]
Expand Down Expand Up @@ -174,23 +175,23 @@ def training_encoding(
"""
Generates encoded headers and values.
:param List[List[List[str]]] x_values_train_list:
:param List[List[List[str]]] x_values_train_list:
Nested list containing the training set for values.
:param List[List[List[str]]] x_headers_train_list:
:param List[List[List[str]]] x_headers_train_list:
Nested list containing the training set for headers.
:param List[pd.Index] y_train_list:
:param List[pd.Index] y_train_list:
List of the column labels ( attributes) for training.
:param List[List[List[str]]] x_values_test_list:
:param List[List[List[str]]] x_values_test_list:
Nested list containing the testing set for values.
:param List[List[List[str]]] x_headers_test_list:
:param List[List[List[str]]] x_headers_test_list:
Nested list containing the testing set for headers.
:param List[pd.Index] y_test_list:
:param List[pd.Index] y_test_list:
List of the column labels ( attributes) for testing.
:param List[List[List[str]]] x_values_val_list:
:param List[List[List[str]]] x_values_val_list:
Nested list containing the validation set for values.
:param List[List[List[str]]] x_headers_val_list:
:param List[List[List[str]]] x_headers_val_list:
Nested list containing the validation set for headers.
:param List[pd.Index] y_val_list:
:param List[pd.Index] y_val_list:
List of the column labels ( attributes) for validation.
:return Tuple[
Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
Expand Down Expand Up @@ -240,7 +241,7 @@ def encode_data(
num_cluster: int,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
"""
This nested function encodes the values, headers and labels data.
This nested function encodes the values, headers and labels data.
It is called for thrice - training, testing, validation.
:param List[List[List[str]]] x_values_list: Nested list containing values.
Expand Down Expand Up @@ -339,11 +340,11 @@ def data_loader(
"""
Creates a DataLoader from encoded tensor data.
:param [torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] encoded_data:
:param [torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] encoded_data:
Tuple containing tensors for
values bag of words, values embeddings, headers embeddings, and labels.
:param int batch_size: The number of samples per batch for the DataLoader.
:return DataLoader: A PyTorch DataLoader which yields
:return DataLoader: A PyTorch DataLoader which yields
batches of data from the given tensors.
"""
(
Expand All @@ -365,11 +366,11 @@ def data_loader(

def drop_bow(bow_tensor: torch.Tensor, num_drops: int) -> torch.Tensor:
"""
Randomly drops a specified number of columns in the
Randomly drops a specified number of columns in the
Bag of Words tensor for regularization.
:param torch.Tensor bow_tensor: Bag of Words tensor.
:param int num_drops: Number of columns to be randomly
:param int num_drops: Number of columns to be randomly
dropped from the Bag of Words tensor.
:return torch.Tensor: Bag of Words tensor with dropped columns.
"""
Expand Down

0 comments on commit a4863b0

Please sign in to comment.