diff --git a/bbconf/config_parser/bedbaseconfig.py b/bbconf/config_parser/bedbaseconfig.py index 0a79758..64b2200 100644 --- a/bbconf/config_parser/bedbaseconfig.py +++ b/bbconf/config_parser/bedbaseconfig.py @@ -238,7 +238,7 @@ def _init_qdrant_backend(self) -> QdrantBackend: f"error in Connection to qdrant! skipping... Error: {err}", UserWarning ) - def _init_qdrant_text_backend(self) -> QdrantBackend: + def _init_qdrant_text_backend(self) -> Union[QdrantBackend, None]: """ Create qdrant client text embedding object using credentials provided in config file @@ -246,12 +246,19 @@ def _init_qdrant_text_backend(self) -> QdrantBackend: """ _LOGGER.info(f"Initializing qdrant text engine...") - return QdrantBackend( - dim=TEXT_EMBEDDING_DIMENSION, - collection=self.config.qdrant.text_collection, - qdrant_host=self.config.qdrant.host, - qdrant_api_key=self.config.qdrant.api_key, - ) + try: + return QdrantBackend( + dim=TEXT_EMBEDDING_DIMENSION, + collection=self.config.qdrant.text_collection, + qdrant_host=self.config.qdrant.host, + qdrant_api_key=self.config.qdrant.api_key, + ) + except Exception as _: + _LOGGER.error("Error in Connection to qdrant text! skipping...") + warnings.warn( + "Error in Connection to qdrant text! skipping...", UserWarning + ) + return None def _init_bivec_object(self) -> Union[BiVectorSearchInterface, None]: """ diff --git a/bbconf/models/bed_models.py b/bbconf/models/bed_models.py index 4ba7118..2545d5d 100644 --- a/bbconf/models/bed_models.py +++ b/bbconf/models/bed_models.py @@ -75,7 +75,7 @@ class BedStatsModel(BaseModel): class BedPEPHub(BaseModel): - sample_name: str + sample_name: str = "" genome: str = "" organism: str = "" species_id: str = "" diff --git a/bbconf/modules/bedfiles.py b/bbconf/modules/bedfiles.py index dbe6dbb..d1dd97b 100644 --- a/bbconf/modules/bedfiles.py +++ b/bbconf/modules/bedfiles.py @@ -614,12 +614,12 @@ def add( def update( self, identifier: str, - stats: dict, - metadata: dict = None, - plots: dict = None, - files: dict = None, - classification: dict = None, - ref_validation: Dict[str, BaseModel] = None, + stats: Union[dict, None] = None, + metadata: Union[dict, None] = None, + plots: Union[dict, None] = None, + files: Union[dict, None] = None, + classification: Union[dict, None] = None, + ref_validation: Union[Dict[str, BaseModel], None] = None, license_id: str = DEFAULT_LICENSE, upload_qdrant: bool = True, upload_pephub: bool = True, @@ -663,11 +663,11 @@ def update( f"List of licenses: {self.bb_agent.list_of_licenses}" ) - stats = BedStatsModel(**stats) - plots = BedPlots(**plots) - files = BedFiles(**files) - bed_metadata = StandardMeta(**metadata) - classification = BedClassification(**classification) + stats = BedStatsModel(**stats if stats else {}) + plots = BedPlots(**plots if plots else {}) + files = BedFiles(**files if files else {}) + bed_metadata = StandardMeta(**metadata if metadata else {}) + classification = BedClassification(**classification if classification else {}) if upload_pephub: metadata = BedPEPHub(**metadata) @@ -978,16 +978,19 @@ def upload_pephub(self, identifier: str, metadata: dict, overwrite: bool = False ) def update_pephub(self, identifier: str, metadata: dict, overwrite: bool = False): - if not metadata: - _LOGGER.warning("No metadata provided. Skipping pephub upload..") - return False - self._config.phc.sample.update( - namespace=self._config.config.phc.namespace, - name=self._config.config.phc.name, - tag=self._config.config.phc.tag, - sample_name=identifier, - sample_dict=metadata, - ) + try: + if not metadata: + _LOGGER.warning("No metadata provided. Skipping pephub upload..") + return False + self._config.phc.sample.update( + namespace=self._config.config.phc.namespace, + name=self._config.config.phc.name, + tag=self._config.config.phc.tag, + sample_name=identifier, + sample_dict=metadata, + ) + except ResponseError as e: + _LOGGER.warning(f"Could not update pephub. Error: {e}") def delete_pephub_sample(self, identifier: str): """ @@ -1023,6 +1026,10 @@ def upload_file_qdrant( """ _LOGGER.debug(f"Adding bed file to qdrant. bed_id: {bed_id}") + + if not self._qdrant_engine: + raise QdrantInstanceNotInitializedError("Could not upload file.") + bed_embedding = self._embed_file(bed_file) self._qdrant_engine.load( @@ -1559,3 +1566,54 @@ def get_missing_plots( results = [result for result in results] return results + + def get_unprocessed(self, limit: int = 1000, offset: int = 0) -> BedListResult: + """ + Get bed files that are not processed. + + :param limit: number of results to return + :param offset: offset to start from + + :return: list of bed file identifiers + """ + with Session(self._sa_engine) as session: + query = ( + select(Bed).where(Bed.processed.is_(False)).limit(limit).offset(offset) + ) + count_query = select(func.count()).where(Bed.processed.is_(False)) + + count = session.execute(count_query).one()[0] + + bed_results = session.scalars(query) + + results = [] + for bed_object in bed_results: + results.append( + BedMetadataBasic( + id=bed_object.id, + name=bed_object.name, + genome_alias=bed_object.genome_alias, + genome_digest=bed_object.genome_digest, + bed_type=bed_object.bed_type, + bed_format=bed_object.bed_format, + description=bed_object.description, + annotation=StandardMeta( + **( + bed_object.annotations.__dict__ + if bed_object.annotations + else {} + ) + ), + last_update_date=bed_object.last_update_date, + submission_date=bed_object.submission_date, + is_universe=bed_object.is_universe, + license_id=bed_object.license_id, + ) + ) + + return BedListResult( + count=count, + limit=limit, + offset=offset, + results=results, + ) diff --git a/tests/conftest.py b/tests/conftest.py index 773aaa6..2f269aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,19 +16,6 @@ -p 5432:5432 postgres """ -# try: -# subprocess.check_output( -# "docker inspect bedbase-test --format '{{.State.Status}}'", shell=True -# ) -# SERVICE_UNAVAILABLE = False -# except: -# register( -# print, f"Some tests require a test database. To initiate it, run:\n{DB_CMD}" -# ) -# SERVICE_UNAVAILABLE = True -SERVICE_UNAVAILABLE = False - - TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) CONFIG_PATH = os.path.join( @@ -40,6 +27,14 @@ "data", ) +# try: +# BedBaseAgent(config=CONFIG_PATH) +# SERVICE_UNAVAILABLE = False +# except Exception as _: +# SERVICE_UNAVAILABLE = True +SERVICE_UNAVAILABLE = False + + if not SERVICE_UNAVAILABLE: agent = BedBaseAgent(config=CONFIG_PATH) diff --git a/tests/test_bedfile.py b/tests/test_bedfile.py index 07489c7..9875b78 100644 --- a/tests/test_bedfile.py +++ b/tests/test_bedfile.py @@ -204,13 +204,44 @@ def test_bed_delete_not_found(self, bbagent_obj): with pytest.raises(BEDFileNotFoundError): bbagent_obj.bed.delete("not_found") - @pytest.mark.skip("Skipped, not fully implemented") - def test_bed_update(self): - # agent = BedBaseAgent(config=config) - # ff = agent.bed.update("91b2754c8ff01769bacfc80e6923c46e", {"number_of_regions": 44}) - # print(ff) - # assert ff != None - pass + def test_bed_update(self, bbagent_obj): + + # TODO: has to be expanded + with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): + + bed_file = bbagent_obj.bed.get(BED_TEST_ID, full=True) + # assert bed_file.annotation.model_dump(exclude_defaults=True) == {} + assert bed_file.annotation.cell_line == "" + + new_metadata = { + "cell_line": "K562", + "tissue": "blood", + } + bbagent_obj.bed.update( + identifier=BED_TEST_ID, + metadata=new_metadata, + upload_qdrant=False, + upload_s3=False, + ) + + new_bed_file = bbagent_obj.bed.get(BED_TEST_ID, full=True) + + assert new_bed_file.annotation.cell_line == "K562" + + def test_get_unprocessed(self, bbagent_obj): + with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): + return_result = bbagent_obj.bed.get_unprocessed(limit=100, offset=0) + + assert return_result.count == 1 + assert return_result.results[0].id == BED_TEST_ID + + def test_get_missing_plots(self, bbagent_obj): + with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): + return_result = bbagent_obj.bed.get_missing_plots( + "tss_distance", limit=100, offset=0 + ) + + assert return_result[0] == BED_TEST_ID @pytest.mark.skip("Skipped, because ML models and qdrant needed") diff --git a/tests/utils.py b/tests/utils.py index 7ceb072..20a6ecc 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -39,6 +39,7 @@ def get_example_dict() -> dict: "genome_alias": "hg38", "genome_digest": "2230c535660fb4774114bfa966a62f823fdb6d21acf138d4", "name": "random_name", + "processed": False, } return value @@ -109,6 +110,7 @@ def __enter__(self): self._add_bedset_data() def __exit__(self, exc_type, exc_value, exc_traceback): + # If we want to keep data, and schema, comment out the following line self.db_engine.delete_schema() pass