From 7f484a665022b8da84e15a068423980d2c098879 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 13 Sep 2024 13:42:40 -0400 Subject: [PATCH] restructured code --- bedms/attr_standardizer.py | 8 +++++--- bedms/const.py | 2 ++ bedms/utils.py | 2 +- requirements/requirements-all.txt | 2 +- tests/test_bedms.py | 7 ------- trial.py | 12 ------------ 6 files changed, 9 insertions(+), 24 deletions(-) delete mode 100644 trial.py diff --git a/bedms/attr_standardizer.py b/bedms/attr_standardizer.py index 13bf949..fbcaf39 100644 --- a/bedms/attr_standardizer.py +++ b/bedms/attr_standardizer.py @@ -7,6 +7,7 @@ import torch.nn.functional as torch_functional from .const import ( + AVAILABLE_SCHEMAS, CONFIDENCE_THRESHOLD, DROPOUT_PROB, EMBEDDING_SIZE, @@ -17,8 +18,8 @@ OUTPUT_SIZE_BEDBASE, OUTPUT_SIZE_ENCODE, OUTPUT_SIZE_FAIRTRACKS, - SENTENCE_TRANSFORMER_MODEL, PROJECT_NAME, + SENTENCE_TRANSFORMER_MODEL, ) from .model import BoWSTModel from .utils import ( @@ -200,7 +201,8 @@ def standardize( def get_available_schemas() -> list[str]: """ Stores a list of available schemas. + :return list: List of available schemas. """ - schemas = ["ENCODE", "FAIRTRACKS", "BEDBASE"] - return schemas + + return AVAILABLE_SCHEMAS diff --git a/bedms/const.py b/bedms/const.py index 54e9b06..e325671 100644 --- a/bedms/const.py +++ b/bedms/const.py @@ -1,5 +1,7 @@ PROJECT_NAME = "bedmess" +AVAILABLE_SCHEMAS = ["ENCODE", "FAIRTRACKS", "BEDBASE"] + REPO_ID = "databio/attribute-standardizer-model6" MODEL_ENCODE = "model_encode.pth" MODEL_FAIRTRACKS = "model_fairtracks.pth" diff --git a/bedms/utils.py b/bedms/utils.py index aff492e..67dbd2e 100644 --- a/bedms/utils.py +++ b/bedms/utils.py @@ -8,7 +8,7 @@ import peppy import torch from huggingface_hub import hf_hub_download -from pephubclient import PEPHubClient + from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans from sklearn.feature_extraction.text import CountVectorizer diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 848e7e8..3f373a4 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,4 +3,4 @@ numpy torch sentence-transformers pephubclient>=0.4.2 -peppy>=0.40.5 +peppy>=0.40.6 diff --git a/tests/test_bedms.py b/tests/test_bedms.py index 74382ac..a47dfb1 100755 --- a/tests/test_bedms.py +++ b/tests/test_bedms.py @@ -1,16 +1,9 @@ -import pytest from bedms import AttrStandardizer class TestBEDMES: def test_bedmes(self): - model = AttrStandardizer("ENCODE") - - schemas = model.get_available_schemas() - - assert schemas - # results = model.standardize(pep="geo/gse178283:default") results = model.standardize(pep="geo/gse228634:default") assert results diff --git a/trial.py b/trial.py deleted file mode 100644 index 9c2fec8..0000000 --- a/trial.py +++ /dev/null @@ -1,12 +0,0 @@ -from bedms import AttrStandardizer - -model = AttrStandardizer("ENCODE") - -schemas = model.get_available_schemas() - -print(schemas) - -# results = model.standardize(pep="geo/gse178283:default") -results = model.standardize(pep="geo/gse228634:default") - -print(results)