Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.10.0 #71

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d21e497
Added trackHub file creation to bedset
khoroshevskyi Nov 11, 2024
60a7cff
Added trackHub file creation to bedset
khoroshevskyi Nov 12, 2024
0e5a36e
Added bed sql search
khoroshevskyi Nov 13, 2024
0e75c72
Set limit on bedset trackhub response
khoroshevskyi Nov 13, 2024
9f231b7
improved error handling in bedset
khoroshevskyi Nov 21, 2024
8adf567
bump pephubclient version
khoroshevskyi Nov 26, 2024
a4dd469
add some logging and docs improvements
nsheff Nov 27, 2024
5ba0c56
some ideas toward #65
nsheff Nov 27, 2024
8940fb5
Fixed #64
khoroshevskyi Dec 2, 2024
3ad2523
Fixed #66
khoroshevskyi Dec 3, 2024
3c146f5
updated requirements
khoroshevskyi Dec 3, 2024
4ca2adf
Merge remote-tracking branch 'origin/dev' into dev
khoroshevskyi Dec 3, 2024
590ee70
lint
khoroshevskyi Dec 3, 2024
530c891
Merge branch 'refs/heads/dev' into partial_processing
khoroshevskyi Dec 9, 2024
af026e1
updated for partial_processing
khoroshevskyi Dec 9, 2024
e464003
updated tests and fixed uploading files
khoroshevskyi Dec 11, 2024
d4bf110
updated uv in tests
khoroshevskyi Dec 11, 2024
fa91690
updated installation
khoroshevskyi Dec 11, 2024
3899882
updated pip
khoroshevskyi Dec 11, 2024
a8d48f7
lower version of pybiocfilecache
khoroshevskyi Dec 11, 2024
bcb20f8
SERVICE_UNAVAILABLE TRUE
khoroshevskyi Dec 11, 2024
7511e01
Merge branch 'refs/heads/dev' into partial_processing
khoroshevskyi Dec 15, 2024
f9a55b2
- added tests
khoroshevskyi Dec 15, 2024
1018496
- added processed table to bedsets
khoroshevskyi Dec 16, 2024
8134d8f
Fixed qdrant init error
khoroshevskyi Dec 17, 2024
c3b4bcf
Added get method for reference validation results
khoroshevskyi Dec 18, 2024
4c9891b
Fixed Donald comments for PR
khoroshevskyi Dec 19, 2024
2d7403b
Merge pull request #72 from databio/partial_processing
khoroshevskyi Dec 19, 2024
a6d5119
improved reindexing
khoroshevskyi Dec 20, 2024
5b71c7a
improved reindexing batching
khoroshevskyi Dec 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions .github/workflows/run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
pytest:
strategy:
matrix:
python-version: ["3.9", "3.11"]
python-version: ["3.9", "3.12"]
os: [ubuntu-latest] # can't use macOS when using service containers or container jobs
runs-on: ${{ matrix.os }}
services:
Expand All @@ -27,21 +27,23 @@ jobs:
- 5432:5432
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
steps:
- uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install uv
run: pip install uv

- name: Install dev dependencies
run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi
run: if [ -f requirements/requirements-dev.txt ]; then uv pip install -r requirements/requirements-dev.txt --system; fi

- name: Install test dependencies
run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
run: if [ -f requirements/requirements-test.txt ]; then uv pip install -r requirements/requirements-test.txt --system; fi

- name: Install package
run: python -m pip install .
run: uv pip install . --system

- name: Run pytest tests
run: pytest tests -x -vv
2 changes: 1 addition & 1 deletion bbconf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.9.0"
__version__ = "0.10.0"
20 changes: 18 additions & 2 deletions bbconf/bbagent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

from functools import cached_property
from pathlib import Path
from typing import List, Union
Expand All @@ -12,19 +14,26 @@
from bbconf.modules.bedsets import BedAgentBedSet
from bbconf.modules.objects import BBObjects

from .const import PKG_NAME

_LOGGER = logging.getLogger(PKG_NAME)


class BedBaseAgent(object):
def __init__(
self,
config: Union[Path, str],
init_ml: bool = True,
):
"""
Initialize connection to the pep_db database. You can use The basic connection parameters
Initialize connection to the pep_db database. You can use the basic connection parameters
or libpq connection string.

:param config: path to the configuration file
:param init_ml: initialize ML models for search (default: True)
"""

self.config = BedBaseConfig(config)
self.config = BedBaseConfig(config, init_ml)

self._bed = BedAgentBedFile(self.config, self)
self._bedset = BedAgentBedSet(self.config)
Expand All @@ -42,6 +51,13 @@ def bedset(self) -> BedAgentBedSet:
def objects(self) -> BBObjects:
return self._objects

def __repr__(self) -> str:
repr = f"BedBaseAgent(config={self.config})"
repr += f"\n{self.bed}"
repr += f"\n{self.bedset}"
repr += f"\n{self.objects}"
return repr

def get_stats(self) -> StatsReturn:
"""
Get statistics for a bed file
Expand Down
67 changes: 54 additions & 13 deletions bbconf/config_parser/bedbaseconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,38 @@
_LOGGER = logging.getLogger(PKG_NAME)


class BedBaseConfig:
def __init__(self, config: Union[Path, str]):
class BedBaseConfig(object):
"""
Class to handle BEDbase configuration file and create objects for different modules.
"""

def __init__(self, config: Union[Path, str], init_ml: bool = True):
"""
Initialize BedBaseConfig object

:param config: path to the configuration file
:param init_ml: initialize machine learning models used for search
"""

self.cfg_path = get_bedbase_cfg(config)
self._config = self._read_config_file(self.cfg_path)

self._db_engine = self._init_db_engine()

self._qdrant_engine = self._init_qdrant_backend()
self._qdrant_text_engine = self._init_qdrant_text_backend()
self._b2bsi = self._init_b2bsi_object()
self._r2v = self._init_r2v_object()
self._bivec = self._init_bivec_object()

if init_ml:
self._b2bsi = self._init_b2bsi_object()
self._r2v = self._init_r2v_object()
self._bivec = self._init_bivec_object()
else:
_LOGGER.info(
f"Skipping initialization of ML models, init_ml parameter set to False."
)

self._b2bsi = None
self._r2v = None
self._bivec = None

self._phc = self._init_pephubclient()
self._boto3_client = self._init_boto3_client()
Expand Down Expand Up @@ -182,6 +203,11 @@ def zarr_root(self) -> Union[Z_GROUP, None]:
return zarr.group(store=cache, overwrite=False)

def _init_db_engine(self) -> BaseEngine:
"""
Create database engine object using credentials provided in config file
"""

_LOGGER.info(f"Initializing database engine...")
return BaseEngine(
host=self._config.database.host,
port=self._config.database.port,
Expand All @@ -197,6 +223,8 @@ def _init_qdrant_backend(self) -> QdrantBackend:

:return: QdrantClient
"""

_LOGGER.info(f"Initializing qdrant engine...")
try:
return QdrantBackend(
collection=self._config.qdrant.file_collection,
Expand All @@ -210,19 +238,27 @@ def _init_qdrant_backend(self) -> QdrantBackend:
f"error in Connection to qdrant! skipping... Error: {err}", UserWarning
)

def _init_qdrant_text_backend(self) -> QdrantBackend:
def _init_qdrant_text_backend(self) -> Union[QdrantBackend, None]:
"""
Create qdrant client text embedding object using credentials provided in config file

:return: QdrantClient
"""

return QdrantBackend(
dim=TEXT_EMBEDDING_DIMENSION,
collection=self.config.qdrant.text_collection,
qdrant_host=self.config.qdrant.host,
qdrant_api_key=self.config.qdrant.api_key,
)
_LOGGER.info(f"Initializing qdrant text engine...")
try:
return QdrantBackend(
dim=TEXT_EMBEDDING_DIMENSION,
collection=self.config.qdrant.text_collection,
qdrant_host=self.config.qdrant.host,
qdrant_api_key=self.config.qdrant.api_key,
)
except Exception as e:
_LOGGER.error(f"Error in Connection to qdrant text! skipping {e}")
warnings.warn(
"Error in Connection to qdrant text! skipping...", UserWarning
)
return None

def _init_bivec_object(self) -> Union[BiVectorSearchInterface, None]:
"""
Expand All @@ -231,9 +267,11 @@ def _init_bivec_object(self) -> Union[BiVectorSearchInterface, None]:
:return: BiVectorSearchInterface
"""

_LOGGER.info(f"Initializing BiVectorBackend...")
search_backend = BiVectorBackend(
metadata_backend=self._qdrant_text_engine, bed_backend=self._qdrant_engine
)
_LOGGER.info(f"Initializing BiVectorSearchInterface...")
search_interface = BiVectorSearchInterface(
backend=search_backend,
query2vec=self.config.path.text2vec,
Expand All @@ -247,6 +285,7 @@ def _init_b2bsi_object(self) -> Union[BED2BEDSearchInterface, None]:
:return: Bed2BEDSearchInterface object
"""
try:
_LOGGER.info(f"Initializing search interfaces...")
return BED2BEDSearchInterface(
backend=self.qdrant_engine,
query2vec=BED2Vec(model=self._config.path.region2vec),
Expand All @@ -267,6 +306,7 @@ def _init_pephubclient() -> Union[PEPHubClient, None]:
:return: PephubClient
"""
try:
_LOGGER.info(f"Initializing PEPHub client...")
return PEPHubClient()
except Exception as e:
_LOGGER.error(f"Error in creating PephubClient object: {e}")
Expand Down Expand Up @@ -298,6 +338,7 @@ def _init_r2v_object(self) -> Union[Region2VecExModel, None]:
Create Region2VecExModel object using credentials provided in config file
"""
try:
_LOGGER.info(f"Initializing R2V object...")
return Region2VecExModel(self.config.path.region2vec)
except Exception as e:
_LOGGER.error(f"Error in creating Region2VecExModel object: {e}")
Expand Down
16 changes: 14 additions & 2 deletions bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ class Bed(Base):
ref_classifier: Mapped["GenomeRefStats"] = relationship(
"GenomeRefStats", back_populates="bed", cascade="all, delete-orphan"
)
processed: Mapped[bool] = mapped_column(
default=False, comment="Whether the bed file was processed"
)


class BedMetadata(Base):
Expand Down Expand Up @@ -255,6 +258,11 @@ class Files(Base):
bedfile: Mapped["Bed"] = relationship("Bed", back_populates="files")
bedset: Mapped["BedSets"] = relationship("BedSets", back_populates="files")

__table_args__ = (
UniqueConstraint("name", "bedfile_id"),
UniqueConstraint("name", "bedset_id"),
)


class BedFileBedSetRelation(Base):
__tablename__ = "bedfile_bedset_relation"
Expand Down Expand Up @@ -303,6 +311,10 @@ class BedSets(Base):
author: Mapped[str] = mapped_column(nullable=True, comment="Author of the bedset")
source: Mapped[str] = mapped_column(nullable=True, comment="Source of the bedset")

processed: Mapped[bool] = mapped_column(
default=False, comment="Whether the bedset was processed"
)


class Universes(Base):
__tablename__ = "universes"
Expand Down Expand Up @@ -339,7 +351,7 @@ class TokenizedBed(Base):
nullable=False,
)
universe_id: Mapped[str] = mapped_column(
ForeignKey("universes.id", ondelete="CASCADE", passive_deletes=True),
ForeignKey("universes.id", ondelete="CASCADE"),
primary_key=True,
index=True,
nullable=False,
Expand All @@ -352,7 +364,7 @@ class TokenizedBed(Base):
universe: Mapped["Universes"] = relationship(
"Universes",
back_populates="tokenized",
passive_deletes=True,
passive_deletes="all",
)


Expand Down
6 changes: 6 additions & 0 deletions bbconf/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,9 @@ class QdrantInstanceNotInitializedError(BedBaseConfError):
"""Error type for missing qdrant instance"""

pass


class BedSetTrackHubLimitError(BedBaseConfError):
"""Limit for visualizing trackhub exceeded"""

pass
2 changes: 2 additions & 0 deletions bbconf/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def get_bedbase_cfg(cfg: str = None) -> str:
Optional, the $BEDBASE config env var will be used if not provided
:return str: absolute configuration file path
"""

_LOGGER.info(f"Loading configuration file: {cfg}")
selected_cfg = select_config(config_filepath=cfg, config_env_vars=CFG_ENV_VARS)
if not selected_cfg:
raise BedBaseConnectionError(
Expand Down
12 changes: 9 additions & 3 deletions bbconf/models/bed_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class BedStatsModel(BaseModel):


class BedPEPHub(BaseModel):
sample_name: str
sample_name: str = ""
genome: str = ""
organism: str = ""
species_id: str = ""
Expand Down Expand Up @@ -192,8 +192,8 @@ class BedListResult(BaseModel):

class QdrantSearchResult(BaseModel):
id: str
payload: dict
score: float
payload: dict = None
score: float = None
metadata: Union[BedMetadataBasic, None] = None


Expand Down Expand Up @@ -233,3 +233,9 @@ class RefGenValidModel(BaseModel):
tier_ranking: int

model_config = ConfigDict(extra="forbid")


class RefGenValidReturnModel(BaseModel):
id: str
provided_genome: Union[str, None] = None
compared_genome: List[RefGenValidModel]
Loading
Loading