Skip to content

Commit

Permalink
updated qdrant payloads
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Jun 4, 2024
1 parent 6d6ef25 commit f5213fb
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
2 changes: 1 addition & 1 deletion bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
class SchemaError(Exception):
def __init__(self):
super().__init__(
"""PEP_db connection error! The schema of connected db is incorrect!"""
"""The database schema is incorrect, can't connect to the database!"""
)


Expand Down
1 change: 1 addition & 0 deletions bbconf/models/bed_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ class TokenizedBedResponse(BaseModel):

class BedEmbeddingResult(BaseModel):
identifier: str
payload: dict
embedding: List[float]


Expand Down
27 changes: 21 additions & 6 deletions bbconf/modules/bedfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,15 @@ def get_embedding(self, identifier: str) -> BedEmbeddingResult:
collection_name=self._config.config.qdrant.collection,
ids=[identifier],
with_vectors=True,
with_payload=True,
)
if not result:
raise BEDFileNotFoundError(
f"Bed file with id: {identifier} not found in qdrant database."
)
return BedEmbeddingResult(identifier=identifier, embedding=result[0].vector)
return BedEmbeddingResult(
identifier=identifier, embedding=result[0].vector, payload=result[0].payload
)

def get_ids_list(
self,
Expand Down Expand Up @@ -425,7 +428,11 @@ def add(
if upload_pephub:
metadata = BedPEPHub(**metadata)
try:
self.upload_pephub(identifier, metadata.model_dump(), overwrite)
self.upload_pephub(
identifier,
metadata.model_dump(exclude=set("input_file")),
overwrite,
)
except Exception as e:
_LOGGER.warning(
f"Could not upload to pephub. Error: {e}. nofail: {nofail}"
Expand All @@ -438,7 +445,9 @@ def add(
if upload_qdrant:
if classification.genome_alias == "hg38":
self.upload_file_qdrant(
identifier, files.bed_file.path, {"bed_id": identifier}
identifier,
files.bed_file.path,
metadata.model_dump(exclude=set("input_file")),
)
_LOGGER.info(f"File uploaded to qdrant. {identifier}")
else:
Expand Down Expand Up @@ -519,7 +528,7 @@ def update(
"""
Update bed file to the database.
!! WARNING: this method is in development. Try not to use it !!
!! WARNING: this method is in development. Please, void of using it!
:param identifier: bed file identifier
:param stats: bed file results {statistics, plots, files, metadata}
Expand Down Expand Up @@ -560,7 +569,7 @@ def update(

if add_to_qdrant:
self.upload_file_qdrant(
identifier, files.bed_file.path, {"bed_id": identifier}
identifier, files.bed_file.path, payload=metadata.model_dump()
)

statement = select(Bed).where(Bed.id == identifier)
Expand Down Expand Up @@ -798,11 +807,17 @@ def reindex_qdrant(self) -> None:

for record_id in bed_ids:
bed_region_set_obj = bb_client.load_bed(record_id)
metadata = self._config.phc.sample.get(
namespace=self._config.config.phc.namespace,
name=self._config.config.phc.name,
tag=self._config.config.phc.tag,
sample_name=record_id,
)

self.upload_file_qdrant(
bed_id=record_id,
bed_file=bed_region_set_obj,
payload={"bed_id": record_id},
payload=metadata,
)

return None
Expand Down

0 comments on commit f5213fb

Please sign in to comment.