Skip to content

Commit

Permalink
Upgraded reindexing of bedbase db
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Jun 6, 2024
1 parent c53c43d commit 60bfaaf
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 23 deletions.
12 changes: 6 additions & 6 deletions bbconf/bbagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,21 @@ def __init__(

self.config = BedBaseConfig(config)

self.__bed = BedAgentBedFile(self.config, self)
self.__bedset = BedAgentBedSet(self.config)
self.__objects = BBObjects(self.config)
self._bed = BedAgentBedFile(self.config, self)
self._bedset = BedAgentBedSet(self.config)
self._objects = BBObjects(self.config)

@property
def bed(self) -> BedAgentBedFile:
return self.__bed
return self._bed

@property
def bedset(self) -> BedAgentBedSet:
return self.__bedset
return self._bedset

@property
def objects(self) -> BBObjects:
return self.__objects
return self._objects

@cached_property
def get_stats(self) -> StatsReturn:
Expand Down
49 changes: 32 additions & 17 deletions bbconf/modules/bedfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
from typing import Dict, Union

import numpy as np
from tqdm import tqdm

from geniml.bbclient import BBClient
from geniml.io import RegionSet
from genimtools.tokenizers import RegionSet as GRegionSet
from pephubclient.exceptions import ResponseError
from qdrant_client.models import Distance, PointIdsList, VectorParams
from sqlalchemy import and_, delete, func, select
Expand Down Expand Up @@ -446,6 +449,7 @@ def add(

if upload_qdrant:
if classification.genome_alias == "hg38":
_LOGGER.info(f"Uploading bed file to qdrant.. [{identifier}]")
self.upload_file_qdrant(
identifier,
files.bed_file.path,
Expand Down Expand Up @@ -705,23 +709,26 @@ def upload_file_qdrant(
:param payload: additional metadata to store alongside vectors
:return: None
"""
if not self._qdrant_engine:
if self._qdrant_engine is None:
raise QdrantInstanceNotInitializedError

if not self._config.r2v:
raise BedBaseConfError(
"Could not add add region to qdrant. Invalid type, or path. "
)

_LOGGER.info(f"Adding bed file to qdrant. bed_id: {bed_id}")
_LOGGER.debug(f"Adding bed file to qdrant. bed_id: {bed_id}")
if isinstance(bed_file, str):
bed_region_set = RegionSet(bed_file)
elif isinstance(bed_file, RegionSet):
bed_region_set = GRegionSet(bed_file)
elif isinstance(bed_file, RegionSet) or isinstance(bed_file, GRegionSet):
bed_region_set = bed_file
else:
raise BedBaseConfError(
"Could not add add region to qdrant. Invalid type, or path. "
)
# Not really working
# bed_embedding = np.mean([self._config.r2v.encode(r) for r in bed_region_set], axis=0)

bed_embedding = np.mean(self._config.r2v.encode(bed_region_set), axis=0)

# Upload bed file vector to the database
Expand Down Expand Up @@ -814,20 +821,28 @@ def reindex_qdrant(self) -> None:

bed_ids = [bed_result[0] for bed_result in bed_ids]

for record_id in bed_ids:
bed_region_set_obj = bb_client.load_bed(record_id)
metadata = self._config.phc.sample.get(
namespace=self._config.config.phc.namespace,
name=self._config.config.phc.name,
tag=self._config.config.phc.tag,
sample_name=record_id,
)
with tqdm(total=len(bed_ids), position=0, leave=True) as pbar:
for record_id in bed_ids:
try:
bed_region_set_obj = GRegionSet(bb_client.seek(record_id))
except FileNotFoundError:
bed_region_set_obj = bb_client.load_bed(record_id)

pbar.set_description(f"Processing file: {record_id}")
metadata = self._config.phc.sample.get(
namespace=self._config.config.phc.namespace,
name=self._config.config.phc.name,
tag=self._config.config.phc.tag,
sample_name=record_id,
)

self.upload_file_qdrant(
bed_id=record_id,
bed_file=bed_region_set_obj,
payload=metadata,
)
self.upload_file_qdrant(
bed_id=record_id,
bed_file=bed_region_set_obj,
payload=BedPEPHubRestrict(**metadata).model_dump(),
)
pbar.write(f"File: {record_id} uploaded to qdrant successfully.")
pbar.update(1)

return None

Expand Down

0 comments on commit 60bfaaf

Please sign in to comment.