diff --git a/.github/workflows/cli-coverage.yml b/.github/workflows/cli-coverage.yml index 60076ba..de3a3c0 100644 --- a/.github/workflows/cli-coverage.yml +++ b/.github/workflows/cli-coverage.yml @@ -44,7 +44,7 @@ jobs: - run: smokeshow upload htmlcov env: - SMOKESHOW_GITHUB_STATUS_DESCRIPTION: CLI Coverage {coverage-percentage} + SMOKESHOW_GITHUB_STATUS_DESCRIPTION: Coverage {coverage-percentage} SMOKESHOW_GITHUB_COVERAGE_THRESHOLD: 50 SMOKESHOW_GITHUB_CONTEXT: coverage SMOKESHOW_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/changelog.md b/docs/changelog.md index b3f7276..d1f7d24 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,7 +2,16 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.7.3] -- 2023-02-05 +## [0.8.0] -- 2024-02-26 +- Fixed forking schema +- Improved forking efficiency [#129](https://github.com/pepkit/pepdbagent/issues/129) +- Added uploading project from dict +- Added get_config, get_smaples, get_subsamples methods to project module [#128](https://github.com/pepkit/pepdbagent/issues/128) +- Fixed error handling in views API [#130](https://github.com/pepkit/pepdbagent/issues/130) +- Added no_fail to views API + + +## [0.7.3] -- 2024-02-08 - Fixed POP update - Improved error handling in views API - Added stats method to Namespace module @@ -10,18 +19,18 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Added coverage -## [0.7.2] -- 2023-02-02 +## [0.7.2] -- 2024-02-02 - Fixed Unique Constraint in the Views - Fixed update project pop method - Fixed bug in duplicating samples -## [0.7.1] -- 2023-01-22 +## [0.7.1] -- 2024-01-22 - Fixed bug in Stars annotation - SQL efficiency improvements - Added sort by date in stared projects -## [0.7.0] -- 2023-01-17 +## [0.7.0] -- 2024-01-17 - Added `pop` to project table and annotation model [#107](https://github.com/pepkit/pepdbagent/issues/107) - Added `forked_from` feature [#73](https://github.com/pepkit/pepdbagent/issues/73) - Switched to pydantic2 [#105](https://github.com/pepkit/pepdbagent/issues/105) diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 4910b9e..777f190 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.7.3" +__version__ = "0.8.0" diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index 4b90fac..a9b0bda 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -92,3 +92,8 @@ class ViewAlreadyExistsError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""View already in the project. {msg}""") + + +class NamespaceNotFoundError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Project does not exist. {msg}""") diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 4de4bd8..a0b2c45 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -1,10 +1,23 @@ # file with pydantic models from typing import List, Optional, Union, Dict from pydantic import BaseModel, Field, ConfigDict, field_validator +from peppy.const import CONFIG_KEY, SUBSAMPLE_RAW_LIST_KEY, SAMPLE_RAW_DICT_KEY from pepdbagent.const import DEFAULT_TAG +class ProjectDict(BaseModel): + """ + Project dict (raw) model + """ + + config: dict = Field(alias=CONFIG_KEY) + subsample_list: Optional[Union[list, None]] = Field(alias=SUBSAMPLE_RAW_LIST_KEY) + sample_dict: list = Field(alias=SAMPLE_RAW_DICT_KEY) + + model_config = ConfigDict(populate_by_name=True, extra="forbid") + + class AnnotationModel(BaseModel): """ Project Annotation model. All meta metadata diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 9616502..5af92db 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -8,6 +8,7 @@ from sqlalchemy.orm import Session from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, PKG_NAME, DEFAULT_LIMIT_INFO +from pepdbagent.exceptions import NamespaceNotFoundError from pepdbagent.db_utils import Projects, BaseEngine from pepdbagent.models import ( Namespace, @@ -221,9 +222,9 @@ def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats: :param monthly: if True, get statistics for the last 3 years monthly, else for the last 3 months daily. """ if monthly: - number_of_month = 3 - else: number_of_month = 12 * 3 + else: + number_of_month = 3 today_date = datetime.today().date() + timedelta(days=1) three_month_ago = today_date - timedelta(days=number_of_month * 30 + 1) statement_last_update = select(Projects.last_update_date).filter( @@ -240,6 +241,9 @@ def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats: update_results = session.execute(statement_last_update).all() create_results = session.execute(statement_create_date).all() + if not update_results: + raise NamespaceNotFoundError(f"Namespace {namespace} not found in the database") + if monthly: year_month_str_submission = [ dt.submission_date.strftime("%Y-%m") for dt in create_results diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 1319b5a..afe5f20 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -8,12 +8,14 @@ from sqlalchemy.exc import IntegrityError, NoResultFound from sqlalchemy.orm import Session from sqlalchemy import Select +import numpy as np from peppy.const import ( SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_LIST_KEY, CONFIG_KEY, SAMPLE_TABLE_INDEX_KEY, + SAMPLE_NAME_ATTR, ) from pepdbagent.const import ( @@ -24,8 +26,12 @@ ) from pepdbagent.db_utils import Projects, Samples, Subsamples, BaseEngine -from pepdbagent.exceptions import ProjectNotFoundError, ProjectUniqueNameError -from pepdbagent.models import UpdateItems, UpdateModel +from pepdbagent.exceptions import ( + ProjectNotFoundError, + ProjectUniqueNameError, + PEPDatabaseAgentError, +) +from pepdbagent.models import UpdateItems, UpdateModel, ProjectDict from pepdbagent.utils import create_digest, registry_path_converter @@ -210,7 +216,7 @@ def delete_by_rp( def create( self, - project: peppy.Project, + project: Union[peppy.Project, dict], namespace: str, name: str = None, tag: str = DEFAULT_TAG, @@ -227,6 +233,13 @@ def create( update is set True) :param peppy.Project project: Project object that has to be uploaded to the DB + danger zone: + optionally, project can be a dictionary with PEP elements + ({ + _config: dict, + _sample_dict: Union[list, dict], + _subsample_list: list + }) :param namespace: namespace of the project (Default: 'other') :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project. @@ -239,9 +252,22 @@ def create( :param description: description of the project :return: None """ - proj_dict = project.to_dict(extended=True, orient="records") + if isinstance(project, peppy.Project): + proj_dict = project.to_dict(extended=True, orient="records") + elif isinstance(project, dict): + # verify if the dictionary has all necessary elements. + # samples should be always presented as list of dicts (orient="records")) + _LOGGER.warning( + f"Project f{namespace}/{name}:{tag} is provided as dictionary. Project won't be validated." + ) + proj_dict = ProjectDict(**project).model_dump(by_alias=True) + else: + raise PEPDatabaseAgentError( + "Project has to be peppy.Project object or dictionary with PEP elements" + ) + if not description: - description = project.description + description = project.get(description, "") proj_dict[CONFIG_KEY][DESCRIPTION_KEY] = description namespace = namespace.lower() @@ -255,7 +281,10 @@ def create( proj_dict[CONFIG_KEY][NAME_KEY] = proj_name proj_digest = create_digest(proj_dict) - number_of_samples = len(project.samples) + try: + number_of_samples = len(project.samples) + except AttributeError: + number_of_samples = len(proj_dict[SAMPLE_RAW_DICT_KEY]) if update_only: _LOGGER.info(f"Update_only argument is set True. Updating project {proj_name} ...") @@ -293,7 +322,9 @@ def create( self._add_samples_to_project( new_prj, proj_dict[SAMPLE_RAW_DICT_KEY], - sample_table_index=project.sample_table_index, + sample_table_index=proj_dict[CONFIG_KEY].get( + SAMPLE_TABLE_INDEX_KEY, SAMPLE_NAME_ATTR + ), ) if proj_dict[SUBSAMPLE_RAW_LIST_KEY]: @@ -833,6 +864,7 @@ def fork( namespace=original_namespace, name=original_name, tag=original_tag, + raw=True, ), namespace=fork_namespace, name=fork_name, @@ -857,6 +889,80 @@ def fork( fork_prj.forked_from_id = original_prj.id fork_prj.pop = original_prj.pop fork_prj.submission_date = original_prj.submission_date + fork_prj.pep_schema = original_prj.pep_schema + fork_prj.description = description or original_prj.description session.commit() return None + + def get_config(self, namespace: str, name: str, tag: str) -> Union[dict, None]: + """ + Get project configuration by providing namespace, name, and tag + + :param namespace: project namespace + :param name: project name + :param tag: project tag + :return: project configuration + """ + statement = select(Projects.config).where( + and_(Projects.namespace == namespace, Projects.name == name, Projects.tag == tag) + ) + with Session(self._sa_engine) as session: + result = session.execute(statement).one_or_none() + + if result: + return result[0] + return None + + def get_subsamples(self, namespace: str, name: str, tag: str) -> Union[list, None]: + """ + Get project subsamples by providing namespace, name, and tag + + :param namespace: project namespace + :param name: project name + :param tag: project tag + :return: list with project subsamples + """ + statement = self._create_select_statement(name, namespace, tag) + + with Session(self._sa_engine) as session: + + found_prj = session.scalar(statement) + + if found_prj: + _LOGGER.info(f"Project has been found: {found_prj.namespace}, {found_prj.name}") + subsample_dict = {} + if found_prj.subsamples_mapping: + for subsample in found_prj.subsamples_mapping: + if subsample.subsample_number not in subsample_dict.keys(): + subsample_dict[subsample.subsample_number] = [] + subsample_dict[subsample.subsample_number].append(subsample.subsample) + return list(subsample_dict.values()) + else: + return [] + else: + raise ProjectNotFoundError( + f"No project found for supplied input: '{namespace}/{name}:{tag}'. " + f"Did you supply a valid namespace and project?" + ) + + def get_samples(self, namespace: str, name: str, tag: str, raw: bool = True) -> list: + """ + Get project samples by providing namespace, name, and tag + + :param namespace: project namespace + :param name: project name + :param tag: project tag + :param raw: if True, retrieve unprocessed (raw) PEP dict. [Default: True] + + :return: list with project samples + """ + if raw: + return self.get(namespace=namespace, name=name, tag=tag, raw=True).get( + SAMPLE_RAW_DICT_KEY + ) + return ( + self.get(namespace=namespace, name=name, tag=tag, raw=False) + .sample_table.replace({np.nan: None}) + .to_dict(orient="records") + ) diff --git a/pepdbagent/modules/view.py b/pepdbagent/modules/view.py index ef4fc30..fb31a0f 100644 --- a/pepdbagent/modules/view.py +++ b/pepdbagent/modules/view.py @@ -137,6 +137,7 @@ def create( view_name: str, view_dict: Union[dict, CreateViewDictModel], description: str = None, + no_fail: bool = False, ) -> None: """ Create a view of the project in the database. @@ -151,6 +152,7 @@ def create( sample_list: List[str] # list of sample names } :param description: description of the view + :param no_fail: if True, skip samples that doesn't exist in the project retrun: None """ _LOGGER.debug(f"Creating view {view_name} with provided info: (view_dict: {view_dict})") @@ -185,11 +187,15 @@ def create( Samples.sample_name == sample_name, ) ) - sample_id = sa_session.execute(sample_statement).one()[0] - if not sample_id: + sample_id_tuple = sa_session.execute(sample_statement).one_or_none() + if sample_id_tuple: + sample_id = sample_id_tuple[0] + elif not sample_id_tuple and not no_fail: raise SampleNotFoundError( f"Sample {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag}:{sample_name} does not exist" ) + else: + continue sa_session.add(ViewSampleAssociation(sample_id=sample_id, view=view)) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index e4b7e0e..d86dcfc 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -6,3 +6,4 @@ coloredlogs>=15.0.1 pytest-mock pydantic>=2.0 psycopg>=3.1.15 +numpy>=1.24.4 diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 13c854f..7fd7d66 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -5,6 +5,7 @@ import peppy import pytest from sqlalchemy.exc import OperationalError +import numpy as np import pepdbagent from pepdbagent.exceptions import ( @@ -60,6 +61,16 @@ def test_create_project(self, initiate_empty_pepdb_con, list_of_available_peps): ) assert True + def test_create_project_from_dict(self, initiate_empty_pepdb_con, list_of_available_peps): + prj = peppy.Project(list_of_available_peps["namespace3"]["subtables"]) + initiate_empty_pepdb_con.project.create( + prj.to_dict(extended=True, orient="records"), + namespace="test", + name="imply", + overwrite=True, + ) + assert True + @pytest.mark.parametrize( "namespace, name", [ @@ -77,6 +88,78 @@ def test_get_project(self, initiate_pepdb_con, namespace, name): ff = peppy.Project(get_path_to_example_file(namespace, name)) assert kk == ff + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ], + ) + def test_get_config(self, initiate_pepdb_con, namespace, name): + description = "" + kk = initiate_pepdb_con.project.get_config( + namespace=namespace, + name=name, + tag="default", + ) + ff = peppy.Project(get_path_to_example_file(namespace, name)) + ff.description = description + ff.name = name + assert kk == ff.config + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace3", "subtables"], + ], + ) + def test_get_subsamples(self, initiate_pepdb_con, namespace, name): + prj_subtables = initiate_pepdb_con.project.get_subsamples( + namespace=namespace, + name=name, + tag="default", + ) + orgiginal_prj = peppy.Project(get_path_to_example_file(namespace, name)) + + assert ( + prj_subtables + == orgiginal_prj.to_dict(extended=True, orient="records")["_subsample_list"] + ) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace3", "subtables"], + ], + ) + def test_get_samples_raw(self, initiate_pepdb_con, namespace, name): + prj_samples = initiate_pepdb_con.project.get_samples( + namespace=namespace, name=name, tag="default", raw=True + ) + orgiginal_prj = peppy.Project(get_path_to_example_file(namespace, name)) + + assert ( + prj_samples == orgiginal_prj.to_dict(extended=True, orient="records")["_sample_dict"] + ) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace3", "subtables"], + ], + ) + def test_get_samples_processed(self, initiate_pepdb_con, namespace, name): + prj_samples = initiate_pepdb_con.project.get_samples( + namespace=namespace, + name=name, + tag="default", + raw=False, + ) + orgiginal_prj = peppy.Project(get_path_to_example_file(namespace, name)) + + assert prj_samples == orgiginal_prj.sample_table.replace({np.nan: None}).to_dict( + orient="records" + ) + @pytest.mark.parametrize( "namespace, name,tag", [ @@ -1151,6 +1234,50 @@ def test_create_view(self, initiate_pepdb_con, namespace, name, sample_name, vie assert len(view_project.samples) == 2 assert view_project != project + @pytest.mark.parametrize( + "namespace, name, sample_name, view_name", + [ + ["namespace1", "amendments1", "pig_0h", "view1"], + ], + ) + def test_create_view_with_incorrect_sample( + self, initiate_pepdb_con, namespace, name, sample_name, view_name + ): + with pytest.raises(SampleNotFoundError): + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": "namespace1", + "project_name": "amendments1", + "project_tag": "default", + "sample_list": ["pig_0h", "pig_1h", "pig_2h"], + }, + ) + + @pytest.mark.parametrize( + "namespace, name, sample_name, view_name", + [ + ["namespace1", "amendments1", "pig_0h", "view1"], + ], + ) + def test_create_view_with_incorrect_sample_no_fail( + self, initiate_pepdb_con, namespace, name, sample_name, view_name + ): + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": "namespace1", + "project_name": "amendments1", + "project_tag": "default", + "sample_list": ["pig_0h", "pig_1h", "pig_2h"], + }, + no_fail=True, + ) + project = initiate_pepdb_con.project.get(namespace, name) + view_project = initiate_pepdb_con.view.get(namespace, name, "default", view_name) + assert len(view_project.samples) == 2 + assert view_project != project + @pytest.mark.parametrize( "namespace, name, sample_name", [