Skip to content

Commit

Permalink
- added tests
Browse files Browse the repository at this point in the history
- bug fixes and robustness
- added get_unprocessed
  • Loading branch information
khoroshevskyi committed Dec 15, 2024
1 parent 7511e01 commit f9a55b2
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 49 deletions.
21 changes: 14 additions & 7 deletions bbconf/config_parser/bedbaseconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,20 +238,27 @@ def _init_qdrant_backend(self) -> QdrantBackend:
f"error in Connection to qdrant! skipping... Error: {err}", UserWarning
)

def _init_qdrant_text_backend(self) -> QdrantBackend:
def _init_qdrant_text_backend(self) -> Union[QdrantBackend, None]:
"""
Create qdrant client text embedding object using credentials provided in config file
:return: QdrantClient
"""

_LOGGER.info(f"Initializing qdrant text engine...")
return QdrantBackend(
dim=TEXT_EMBEDDING_DIMENSION,
collection=self.config.qdrant.text_collection,
qdrant_host=self.config.qdrant.host,
qdrant_api_key=self.config.qdrant.api_key,
)
try:
return QdrantBackend(
dim=TEXT_EMBEDDING_DIMENSION,
collection=self.config.qdrant.text_collection,
qdrant_host=self.config.qdrant.host,
qdrant_api_key=self.config.qdrant.api_key,
)
except Exception as _:
_LOGGER.error("Error in Connection to qdrant text! skipping...")
warnings.warn(
"Error in Connection to qdrant text! skipping...", UserWarning
)
return None

def _init_bivec_object(self) -> Union[BiVectorSearchInterface, None]:
"""
Expand Down
2 changes: 1 addition & 1 deletion bbconf/models/bed_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class BedStatsModel(BaseModel):


class BedPEPHub(BaseModel):
sample_name: str
sample_name: str = ""
genome: str = ""
organism: str = ""
species_id: str = ""
Expand Down
100 changes: 79 additions & 21 deletions bbconf/modules/bedfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,12 +614,12 @@ def add(
def update(
self,
identifier: str,
stats: dict,
metadata: dict = None,
plots: dict = None,
files: dict = None,
classification: dict = None,
ref_validation: Dict[str, BaseModel] = None,
stats: Union[dict, None] = None,
metadata: Union[dict, None] = None,
plots: Union[dict, None] = None,
files: Union[dict, None] = None,
classification: Union[dict, None] = None,
ref_validation: Union[Dict[str, BaseModel], None] = None,
license_id: str = DEFAULT_LICENSE,
upload_qdrant: bool = True,
upload_pephub: bool = True,
Expand Down Expand Up @@ -663,11 +663,11 @@ def update(
f"List of licenses: {self.bb_agent.list_of_licenses}"
)

stats = BedStatsModel(**stats)
plots = BedPlots(**plots)
files = BedFiles(**files)
bed_metadata = StandardMeta(**metadata)
classification = BedClassification(**classification)
stats = BedStatsModel(**stats if stats else {})
plots = BedPlots(**plots if plots else {})
files = BedFiles(**files if files else {})
bed_metadata = StandardMeta(**metadata if metadata else {})
classification = BedClassification(**classification if classification else {})

if upload_pephub:
metadata = BedPEPHub(**metadata)
Expand Down Expand Up @@ -978,16 +978,19 @@ def upload_pephub(self, identifier: str, metadata: dict, overwrite: bool = False
)

def update_pephub(self, identifier: str, metadata: dict, overwrite: bool = False):
if not metadata:
_LOGGER.warning("No metadata provided. Skipping pephub upload..")
return False
self._config.phc.sample.update(
namespace=self._config.config.phc.namespace,
name=self._config.config.phc.name,
tag=self._config.config.phc.tag,
sample_name=identifier,
sample_dict=metadata,
)
try:
if not metadata:
_LOGGER.warning("No metadata provided. Skipping pephub upload..")
return False
self._config.phc.sample.update(
namespace=self._config.config.phc.namespace,
name=self._config.config.phc.name,
tag=self._config.config.phc.tag,
sample_name=identifier,
sample_dict=metadata,
)
except ResponseError as e:
_LOGGER.warning(f"Could not update pephub. Error: {e}")

def delete_pephub_sample(self, identifier: str):
"""
Expand Down Expand Up @@ -1023,6 +1026,10 @@ def upload_file_qdrant(
"""

_LOGGER.debug(f"Adding bed file to qdrant. bed_id: {bed_id}")

if not self._qdrant_engine:
raise QdrantInstanceNotInitializedError("Could not upload file.")

bed_embedding = self._embed_file(bed_file)

self._qdrant_engine.load(
Expand Down Expand Up @@ -1559,3 +1566,54 @@ def get_missing_plots(
results = [result for result in results]

return results

def get_unprocessed(self, limit: int = 1000, offset: int = 0) -> BedListResult:
"""
Get bed files that are not processed.
:param limit: number of results to return
:param offset: offset to start from
:return: list of bed file identifiers
"""
with Session(self._sa_engine) as session:
query = (
select(Bed).where(Bed.processed.is_(False)).limit(limit).offset(offset)
)
count_query = select(func.count()).where(Bed.processed.is_(False))

count = session.execute(count_query).one()[0]

bed_results = session.scalars(query)

results = []
for bed_object in bed_results:
results.append(
BedMetadataBasic(
id=bed_object.id,
name=bed_object.name,
genome_alias=bed_object.genome_alias,
genome_digest=bed_object.genome_digest,
bed_type=bed_object.bed_type,
bed_format=bed_object.bed_format,
description=bed_object.description,
annotation=StandardMeta(
**(
bed_object.annotations.__dict__
if bed_object.annotations
else {}
)
),
last_update_date=bed_object.last_update_date,
submission_date=bed_object.submission_date,
is_universe=bed_object.is_universe,
license_id=bed_object.license_id,
)
)

return BedListResult(
count=count,
limit=limit,
offset=offset,
results=results,
)
21 changes: 8 additions & 13 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,6 @@
-p 5432:5432 postgres
"""

# try:
# subprocess.check_output(
# "docker inspect bedbase-test --format '{{.State.Status}}'", shell=True
# )
# SERVICE_UNAVAILABLE = False
# except:
# register(
# print, f"Some tests require a test database. To initiate it, run:\n{DB_CMD}"
# )
# SERVICE_UNAVAILABLE = True
SERVICE_UNAVAILABLE = False


TESTS_DIR = os.path.dirname(os.path.abspath(__file__))

CONFIG_PATH = os.path.join(
Expand All @@ -40,6 +27,14 @@
"data",
)

# try:
# BedBaseAgent(config=CONFIG_PATH)
# SERVICE_UNAVAILABLE = False
# except Exception as _:
# SERVICE_UNAVAILABLE = True
SERVICE_UNAVAILABLE = False


if not SERVICE_UNAVAILABLE:
agent = BedBaseAgent(config=CONFIG_PATH)

Expand Down
45 changes: 38 additions & 7 deletions tests/test_bedfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,44 @@ def test_bed_delete_not_found(self, bbagent_obj):
with pytest.raises(BEDFileNotFoundError):
bbagent_obj.bed.delete("not_found")

@pytest.mark.skip("Skipped, not fully implemented")
def test_bed_update(self):
# agent = BedBaseAgent(config=config)
# ff = agent.bed.update("91b2754c8ff01769bacfc80e6923c46e", {"number_of_regions": 44})
# print(ff)
# assert ff != None
pass
def test_bed_update(self, bbagent_obj):

# TODO: has to be expanded
with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True):

bed_file = bbagent_obj.bed.get(BED_TEST_ID, full=True)
# assert bed_file.annotation.model_dump(exclude_defaults=True) == {}
assert bed_file.annotation.cell_line == ""

new_metadata = {
"cell_line": "K562",
"tissue": "blood",
}
bbagent_obj.bed.update(
identifier=BED_TEST_ID,
metadata=new_metadata,
upload_qdrant=False,
upload_s3=False,
)

new_bed_file = bbagent_obj.bed.get(BED_TEST_ID, full=True)

assert new_bed_file.annotation.cell_line == "K562"

def test_get_unprocessed(self, bbagent_obj):
with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True):
return_result = bbagent_obj.bed.get_unprocessed(limit=100, offset=0)

assert return_result.count == 1
assert return_result.results[0].id == BED_TEST_ID

def test_get_missing_plots(self, bbagent_obj):
with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True):
return_result = bbagent_obj.bed.get_missing_plots(
"tss_distance", limit=100, offset=0
)

assert return_result[0] == BED_TEST_ID


@pytest.mark.skip("Skipped, because ML models and qdrant needed")
Expand Down
2 changes: 2 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_example_dict() -> dict:
"genome_alias": "hg38",
"genome_digest": "2230c535660fb4774114bfa966a62f823fdb6d21acf138d4",
"name": "random_name",
"processed": False,
}
return value

Expand Down Expand Up @@ -109,6 +110,7 @@ def __enter__(self):
self._add_bedset_data()

def __exit__(self, exc_type, exc_value, exc_traceback):
# If we want to keep data, and schema, comment out the following line
self.db_engine.delete_schema()
pass

Expand Down

0 comments on commit f9a55b2

Please sign in to comment.