From a0ef7ce2004d6ba4d11b635ee276212b3e2f27a5 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 23 Mar 2023 14:04:02 -0400 Subject: [PATCH 01/32] fixed const peppy dependencies --- pepdbagent/_version.py | 2 +- pepdbagent/utils.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 493f741..260c070 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index 32ec156..c859f96 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -4,6 +4,7 @@ from typing import Tuple, Union import ubiquerg from .exceptions import RegistryPathError +from peppy.const import SAMPLE_RAW_DICT_KEY def is_valid_registry_path(rpath: str) -> bool: @@ -47,7 +48,7 @@ def create_digest(project_dict: dict) -> str: """ sample_digest = md5( json.dumps( - project_dict["_sample_dict"], + project_dict[SAMPLE_RAW_DICT_KEY], separators=(",", ":"), ensure_ascii=False, allow_nan=False, From 4b2d310c2bac26e07a13cbaa85fd9e5925a78a94 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 23 Mar 2023 14:09:50 -0400 Subject: [PATCH 02/32] added change log --- docs/changelog.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 6c4573b..84c88d4 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,6 +3,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.3.1] -- 2023-03-23 +- Fixed bug with peppy const dependencies + + ## [0.3.0] -- 2023-01-19 - Restructured pepdbagent: From 1ea53707945309613360c824c2edb201eadfc195 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 23 Mar 2023 14:12:00 -0400 Subject: [PATCH 03/32] lint --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5afaa73..98ad67e 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ # Additional keyword arguments for setup(). extra = {"install_requires": DEPENDENCIES} + # Additional files to include with package def get_static(name, condition=None): static = [ From 5867ed33c75d0bf5c4d99ad274f9b9bef30a582d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 May 2023 22:06:19 -0400 Subject: [PATCH 04/32] first work on sqlalchemy --- pepdbagent/schema_initializer.py | 67 ++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 pepdbagent/schema_initializer.py diff --git a/pepdbagent/schema_initializer.py b/pepdbagent/schema_initializer.py new file mode 100644 index 0000000..c3fb9bf --- /dev/null +++ b/pepdbagent/schema_initializer.py @@ -0,0 +1,67 @@ +from sqlalchemy.orm import Mapped +from sqlalchemy.orm import mapped_column +from sqlalchemy import String, BigInteger +from sqlalchemy.ext.compiler import compiles +import datetime +from sqlalchemy.dialects.postgresql import JSONB +from typing import Optional, Any +from sqlalchemy import create_engine +from sqlalchemy import event + +from sqlalchemy.orm import DeclarativeBase, MappedAsDataclass +from sqlalchemy import PrimaryKeyConstraint, FetchedValue + +class BIGSERIAL(BigInteger): + pass + + +@compiles(BIGSERIAL, "postgresql") +def compile_bigserial_pg(type_, compiler, **kw): + return "BIGSERIAL" + + +class Base(MappedAsDataclass, DeclarativeBase): + type_annotation_map = { + dict[str, Any]: JSONB, + } + + +@event.listens_for(Base.metadata, 'after_create') +def receive_after_create(target, connection, tables, **kw): + "listen for the 'after_create' event" + if tables: + print('A table was created') + else: + print('A table was not created') + + +class Projects(Base): + __tablename__ = "projects" + + id: Mapped[int] = mapped_column(BIGSERIAL, server_default=FetchedValue()) + namespace: Mapped[str] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column(primary_key=True) + tag: Mapped[str] = mapped_column(primary_key=True) + digest: Mapped[str] = mapped_column(String(32)) + project_value: Mapped[dict[str, Any]] + private: Mapped[bool] + number_of_samples: Mapped[int] + submission_date: Mapped[datetime.datetime] + last_update_date: Mapped[datetime.datetime] + # schema: Mapped[Optional[str]] + + __table_args__ = ( + PrimaryKeyConstraint("namespace", "name", "tag", name="id"), + ) + + +def main(): + engine = create_engine('postgresql://postgres:docker@localhost:5432/pep-db', echo=True, future=True) + asd = Base.metadata.create_all(engine) + + + + +if __name__ == "__main__": + main() + From 47e128a2728a5741d9148c9ed94fb2a26e6af791 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 May 2023 15:17:57 -0400 Subject: [PATCH 05/32] added base sqlalchemy class --- pepdbagent/const.py | 3 + pepdbagent/db_utils.py | 171 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 pepdbagent/db_utils.py diff --git a/pepdbagent/const.py b/pepdbagent/const.py index 8057140..01cf9cf 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -34,3 +34,6 @@ DEFAULT_OFFSET = 0 DEFAULT_LIMIT = 100 + +# db_dialects +POSTGRES_DIALECT = "postgresql" diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py new file mode 100644 index 0000000..4229ec9 --- /dev/null +++ b/pepdbagent/db_utils.py @@ -0,0 +1,171 @@ +from sqlalchemy.engine import create_engine +from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.orm import DeclarativeBase, MappedAsDataclass +from sqlalchemy.exc import ProgrammingError +from sqlalchemy.orm import Session +from sqlalchemy import PrimaryKeyConstraint, FetchedValue +from sqlalchemy import Table +from sqlalchemy import select + +from sqlalchemy import String, BigInteger +from sqlalchemy.dialects.postgresql import JSONB + +from sqlalchemy import event +from sqlalchemy.ext.compiler import compiles + +from typing import Optional, Any +import datetime +import logging + +from pepdbagent.const import POSTGRES_DIALECT +from exceptions import SchemaError + + +_LOGGER = logging.getLogger("pepdbagent") + + +class BIGSERIAL(BigInteger): + pass + + +@compiles(BIGSERIAL, "postgresql") +def compile_bigserial_pg(type_, compiler, **kw): + return "BIGSERIAL" + + +class Base(MappedAsDataclass, DeclarativeBase): + type_annotation_map = { + dict[str, Any]: JSONB, + } + + +@event.listens_for(Base.metadata, 'after_create') +def receive_after_create(target, connection, tables, **kw): + """ + listen for the 'after_create' event + """ + if tables: + _LOGGER.warning('A table was created') + print('A table was created') + else: + _LOGGER.info('A table was not created') + print('A table was not created') + + +class Projects(Base): + __tablename__ = "projects" + + id: Mapped[int] = mapped_column(BIGSERIAL, server_default=FetchedValue()) + namespace: Mapped[str] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column(primary_key=True) + tag: Mapped[str] = mapped_column(primary_key=True) + digest: Mapped[str] = mapped_column(String(32)) + project_value: Mapped[dict[str, Any]] + private: Mapped[bool] + number_of_samples: Mapped[int] + submission_date: Mapped[datetime.datetime] + last_update_date: Mapped[datetime.datetime] + # schema: Mapped[Optional[str]] + + __table_args__ = ( + PrimaryKeyConstraint("namespace", "name", "tag", name="id"), + ) + + +class BaseEngine(): + """ + A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all + """ + + def __init__( + self, + *, + host: str = "localhost", + port: int = 5432, + database: str = "pep-db", + user: str = None, + password: str = None, + dialect: str = POSTGRES_DIALECT, + dsn: str = None, + echo: bool = False, + ): + """ + Initialize connection to the pep_db database. You can use The basic connection parameters + or libpq connection string. + :param host: database server address e.g., localhost or an IP address. + :param port: the port number that defaults to 5432 if it is not provided. + :param database: the name of the database that you want to connect. + :param user: the username used to authenticate. + :param password: password used to authenticate. + :param dsn: libpq connection string using the dsn parameter + (e.g. 'postgresql://user_name:password@host_name:port/db_name') + """ + if not dsn: + dsn = self._create_dsn_string(host=host, + port=port, + database=database, + user=user, + password=password, + dialect=dialect, ) + + self._engine = create_engine(dsn, echo=echo) + + def create_schema(self): + Base.metadata.create_all(self._engine) + + @property + def session(self): + return self._start_session() + + @property + def engine(self): + return self._engine + + def _start_session(self): + session = Session(self.engine) + try: + session.execute(select(Projects)).first() + except ProgrammingError: + raise SchemaError() + + return session + + + @staticmethod + def _create_dsn_string( + host: str = "localhost", + port: int = 5432, + database: str = "pep-db", + user: str = None, + password: str = None, + dialect: str = POSTGRES_DIALECT, + ) -> str: + """ + Using host, port, database, user, and password and dialect + + :param host: database server address e.g., localhost or an IP address. + :param port: the port number that defaults to 5432 if it is not provided. + :param database: the name of the database that you want to connect. + :param user: the username used to authenticate. + :param password: password used to authenticate. + :param dialect: DB dialect, specific implementation or variant of a database system. [Default: postgresql] + :return: sqlalchemy connection string + """ + return f"{dialect}://{user}:{password}@{host}:{port}/{database}" + + +def main(): + # engine = BaseEngine(dsn='postgresql://postgres:docker@localhost:5432/pep-db') + engine = BaseEngine(host="localhost", + port=5432, + database="pep-db", + user="postgres", + password="docker", + echo=True + ) + # engine.create_schema() + ff = engine.session + + +if __name__ == "__main__": + main() From b5aef4a2670c1a7263a1858e5756cd714f047bd6 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 19 May 2023 00:17:20 -0400 Subject: [PATCH 06/32] added sqlalchemy project --- manual_tests.py | 16 +- pepdbagent/db_utils.py | 88 ++++----- pepdbagent/modules/project.py | 329 ++++++++++++++++------------------ pepdbagent/pepdbagent.py | 34 ++-- pepdbagent/utils.py | 3 + 5 files changed, 237 insertions(+), 233 deletions(-) diff --git a/manual_tests.py b/manual_tests.py index 3b24e98..8429a54 100644 --- a/manual_tests.py +++ b/manual_tests.py @@ -5,21 +5,25 @@ from peppy import Project -con = pepdbagent.PEPDatabaseAgent(dsn="postgresql://postgres:docker@localhost:5432/pep-db") +con = pepdbagent.PEPDatabaseAgent(dsn="postgresql://postgres:docker@localhost:5432/pep-db", echo=True) ############### -# Upload +# # Upload prj = peppy.Project( "/home/bnt4me/virginia/repos/pepdbagent/sample_pep/basic/project_config.yaml" ) con.project.create(project=prj, namespace="Khoroshevskyi", name="dupa", tag="test1", overwrite=True) -# Project +con.project.exists(namespace="Khoroshevskyi", name="dupa", tag="test1") +con.project.update(update_dict={"is_private": False}, namespace="Khoroshevskyi", name="dupa", tag="test1") +# # Project + +# prj_dow = con.project.get(namespace="Khoroshevskyi", name="dupa", tag="test1") -prj_dow = con.project.get(namespace="Khoroshevskyi", name="dupa", tag="test1") +exit(1) print(prj_dow.name) -prj_raw = con.project.get(namespace="Khoroshevskyi", name="dupa", tag="test1", raw=True) +prj_raw = con.project.get(namespace="Khoroshfevskyi", name="dupa", tag="test1", raw=True) print(prj_raw) @@ -27,7 +31,7 @@ ############### # Annotation -dd_list = con.annotation.get_by_rp( +dd_list = con.adialectnnotation.get_by_rp( [ "Khoroshevskyi/gse_yaml:default", "Khoroshevskyi/gse_yaml:default", diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 4229ec9..ed09adf 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -12,14 +12,14 @@ from sqlalchemy import event from sqlalchemy.ext.compiler import compiles +from sqlalchemy.engine import URL from typing import Optional, Any import datetime import logging from pepdbagent.const import POSTGRES_DIALECT -from exceptions import SchemaError - +from pepdbagent.exceptions import SchemaError _LOGGER = logging.getLogger("pepdbagent") @@ -39,17 +39,17 @@ class Base(MappedAsDataclass, DeclarativeBase): } -@event.listens_for(Base.metadata, 'after_create') +@event.listens_for(Base.metadata, "after_create") def receive_after_create(target, connection, tables, **kw): """ listen for the 'after_create' event """ if tables: - _LOGGER.warning('A table was created') - print('A table was created') + _LOGGER.warning("A table was created") + print("A table was created") else: - _LOGGER.info('A table was not created') - print('A table was not created') + _LOGGER.info("A table was not created") + print("A table was not created") class Projects(Base): @@ -67,27 +67,25 @@ class Projects(Base): last_update_date: Mapped[datetime.datetime] # schema: Mapped[Optional[str]] - __table_args__ = ( - PrimaryKeyConstraint("namespace", "name", "tag", name="id"), - ) + __table_args__ = (PrimaryKeyConstraint("namespace", "name", "tag", name="id"),) -class BaseEngine(): +class BaseEngine: """ A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all """ def __init__( - self, - *, - host: str = "localhost", - port: int = 5432, - database: str = "pep-db", - user: str = None, - password: str = None, - dialect: str = POSTGRES_DIALECT, - dsn: str = None, - echo: bool = False, + self, + *, + host: str = "localhost", + port: int = 5432, + database: str = "pep-db", + user: str = None, + password: str = None, + drivername: str = POSTGRES_DIALECT, + dsn: str = None, + echo: bool = False, ): """ Initialize connection to the pep_db database. You can use The basic connection parameters @@ -101,15 +99,23 @@ def __init__( (e.g. 'postgresql://user_name:password@host_name:port/db_name') """ if not dsn: - dsn = self._create_dsn_string(host=host, - port=port, - database=database, - user=user, - password=password, - dialect=dialect, ) + dsn = URL.create( + host=host, + port=port, + database=database, + username=user, + password=password, + drivername=drivername, + ) self._engine = create_engine(dsn, echo=echo) + session = Session(self._engine) + try: + session.execute(select(Projects)).first() + except ProgrammingError: + raise SchemaError() + def create_schema(self): Base.metadata.create_all(self._engine) @@ -130,15 +136,14 @@ def _start_session(self): return session - @staticmethod def _create_dsn_string( - host: str = "localhost", - port: int = 5432, - database: str = "pep-db", - user: str = None, - password: str = None, - dialect: str = POSTGRES_DIALECT, + host: str = "localhost", + port: int = 5432, + database: str = "pep-db", + user: str = None, + password: str = None, + dialect: str = POSTGRES_DIALECT, ) -> str: """ Using host, port, database, user, and password and dialect @@ -156,13 +161,14 @@ def _create_dsn_string( def main(): # engine = BaseEngine(dsn='postgresql://postgres:docker@localhost:5432/pep-db') - engine = BaseEngine(host="localhost", - port=5432, - database="pep-db", - user="postgres", - password="docker", - echo=True - ) + engine = BaseEngine( + host="localhost", + port=5432, + database="pep-db", + user="postgres", + password="docker", + echo=True, + ) # engine.create_schema() ff = engine.session diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 15083bf..ad5bf70 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -3,7 +3,12 @@ from typing import Union, Tuple import logging import peppy -from psycopg2.errors import NotNullViolation, UniqueViolation +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session +from sqlalchemy import Engine +from sqlalchemy import insert, select, delete, update +from sqlalchemy import and_, or_ +from pepdbagent.db_utils import Projects from pepdbagent.models import ( UpdateModel, @@ -27,11 +32,11 @@ class PEPDatabaseProject: While using this class, user can retrieve projects from database """ - def __init__(self, con: BaseConnection): + def __init__(self, engine: Engine): """ - :param con: Connection to db represented by BaseConnection class object + :param engine: Connection to db represented by sqlalchemy engine """ - self.con = con + self._sa_engine = engine def get( self, @@ -42,6 +47,7 @@ def get( ) -> Union[peppy.Project, dict, None]: """ Retrieve project from database by specifying namespace, name and tag + :param namespace: namespace of the project :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project. @@ -58,19 +64,28 @@ def get( if tag is None: tag = DEFAULT_TAG - sql_q = f""" - select {ID_COL}, {PROJ_COL}, {PRIVATE_COL} from {DB_TABLE_NAME} - """ - - sql_q = ( - f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s;""" - ) - found_prj = self.con.run_sql_fetchone(sql_q, name, namespace, tag) + with Session(self._sa_engine) as session: + found_prj = session.execute( + select( + Projects.namespace, + Projects.name, + Projects.project_value, + Projects.private, + ).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + ).one() if found_prj: - _LOGGER.info(f"Project has been found: {found_prj[0]}") - project_value = found_prj[1] - is_private = found_prj[2] + _LOGGER.info( + f"Project has been found: {found_prj.namespace}, {found_prj.name}" + ) + project_value = found_prj.project_value + is_private = found_prj.private if raw: return project_value else: @@ -91,6 +106,7 @@ def get_by_rp( ) -> Union[peppy.Project, dict, None]: """ Retrieve project from database by specifying project registry_path + :param registry_path: project registry_path [e.g. namespace/name:tag] :param raw: retrieve unprocessed (raw) PEP dict. :return: peppy.Project object with found project or dict with unprocessed @@ -113,29 +129,30 @@ def delete( ) -> None: """ Delete record from database + :param namespace: Namespace :param name: Name :param tag: Tag :return: None """ - cursor = self.con.pg_connection.cursor() - sql_delete = f"""DELETE FROM {DB_TABLE_NAME} - WHERE {NAMESPACE_COL} = %s and {NAME_COL} = %s and {TAG_COL} = %s;""" + with self._sa_engine as engine: + engine.execute( + delete(Projects).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + ) + + _LOGGER.info(f"Project '{namespace}/{name}:{tag} was successfully deleted'") if not self.exists(namespace=namespace, name=name, tag=tag): raise ProjectNotFoundError( f"Can't delete unexciting project: '{namespace}/{name}:{tag}'." ) - try: - cursor.execute(sql_delete, (namespace, name, tag)) - _LOGGER.info(f"Project '{namespace}/{name}:{tag} was successfully deleted'") - except Exception as err: - _LOGGER.error(f"Error while deleting project. Message: {err}") - finally: - cursor.close() - return None - def delete_by_rp( self, registry_path: str, @@ -153,7 +170,7 @@ def create( project: peppy.Project, namespace: str, name: str = None, - tag: str = None, + tag: str = DEFAULT_TAG, is_private: bool = False, overwrite: bool = False, update_only: bool = False, @@ -172,16 +189,16 @@ def create( :param update_only: if project exists overwrite it, otherwise do nothing. [Default: False] :return: None """ - cursor = self.con.pg_connection.cursor() - if tag is None: - tag = DEFAULT_TAG - proj_dict = project.to_dict(extended=True) if name: proj_name = name - else: + elif proj_dict["name"]: proj_name = proj_dict["name"] + else: + raise ValueError( + f"Name of the project wasn't provided. Project will not be uploaded." + ) proj_dict["name"] = name @@ -201,42 +218,29 @@ def create( tag=tag, project_digest=proj_digest, number_of_samples=number_of_samples, + private=is_private, ) return None else: try: _LOGGER.info(f"Uploading {namespace}/{proj_name}:{tag} project...") - sql_base = f"""INSERT INTO {DB_TABLE_NAME} - ({NAMESPACE_COL}, {NAME_COL}, {TAG_COL}, {DIGEST_COL}, {PROJ_COL}, {N_SAMPLES_COL}, - {PRIVATE_COL}, {SUBMISSION_DATE_COL}, {LAST_UPDATE_DATE_COL}) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) - RETURNING {ID_COL};""" - - cursor.execute( - sql_base, - ( - namespace, - proj_name, - tag, - proj_digest, - proj_dict, - number_of_samples, - is_private, - datetime.datetime.now(), - datetime.datetime.now(), - ), - ) - proj_id = cursor.fetchone()[0] - - self.con.commit_to_database() - cursor.close() - _LOGGER.info( - f"Project: '{namespace}/{proj_name}:{tag}' was successfully uploaded." - ) - return None + with self._sa_engine.begin() as engine: + engine.execute( + insert(Projects).values( + namespace=namespace, + name=proj_name, + tag=tag, + digest=proj_digest, + project_value=proj_dict, + number_of_samples=number_of_samples, + private=is_private, + submission_date=datetime.datetime.now(), + last_update_date=datetime.datetime.now(), + ) + ) - except UniqueViolation: + except IntegrityError: if overwrite: self._overwrite( project_dict=proj_dict, @@ -245,8 +249,10 @@ def create( tag=tag, project_digest=proj_digest, number_of_samples=number_of_samples, + private=is_private, ) return None + else: raise ProjectUniqueNameError( f"Namespace, name and tag already exists. Project won't be " @@ -254,11 +260,6 @@ def create( f" (project will be overwritten), or change tag!" ) - except NotNullViolation as err: - raise ValueError( - f"Name of the project wasn't provided. Project will not be uploaded. Error: {err}" - ) - def _overwrite( self, project_dict: json, @@ -267,6 +268,7 @@ def _overwrite( tag: str, project_digest: str, number_of_samples: int, + private: bool = False, ) -> None: """ Update existing project by providing all necessary information. @@ -276,31 +278,35 @@ def _overwrite( :param tag: project tag :param project_digest: project digest :param number_of_samples: number of samples in project + :param private: boolean value if the project should be visible just for user that creates it. :return: None """ - - cursor = self.con.pg_connection.cursor() - if self.exists(namespace=namespace, name=proj_name, tag=tag): _LOGGER.info(f"Updating {proj_name} project...") - sql = f"""UPDATE {DB_TABLE_NAME} - SET {DIGEST_COL} = %s, {PROJ_COL}= %s, {N_SAMPLES_COL}= %s, {LAST_UPDATE_DATE_COL} = %s - WHERE {NAMESPACE_COL} = %s and {NAME_COL} = %s and {TAG_COL} = %s;""" - cursor.execute( - sql, - ( - project_digest, - project_dict, - number_of_samples, - datetime.datetime.now(), - namespace, - proj_name, - tag, - ), - ) - self.con.commit_to_database() + with self._sa_engine.begin() as engine: + engine.execute( + update(Projects) + .values( + namespace=namespace, + name=proj_name, + tag=tag, + digest=project_digest, + project_value=project_dict, + number_of_samples=number_of_samples, + private=private, + last_update_date=datetime.datetime.now(), + ) + .where( + and_( + Projects.namespace == namespace, + Projects.name == proj_name, + Projects.tag == tag, + ) + ) + ) + _LOGGER.info( - f"Project '{namespace}/{proj_name}:{tag}' has been updated successfully!" + f"Project '{namespace}/{proj_name}:{tag}' has been successfully updated!" ) return None @@ -314,7 +320,7 @@ def update( update_dict: Union[dict, UpdateItems], namespace: str, name: str, - tag: str, + tag: str = DEFAULT_TAG, ) -> None: """ Update partial parts of the record in db @@ -331,93 +337,75 @@ def update( :param tag: project tag :return: None """ - cursor = self.con.pg_connection.cursor() - - if isinstance(update_dict, UpdateItems): - update_values = update_dict - else: - update_values = UpdateItems(**update_dict) - if self.exists(namespace=namespace, name=name, tag=tag): - update_final = UpdateModel() - - if update_values.project_value is not None: - update_final = UpdateModel( - project_value=update_values.project_value.to_dict(extended=True), - name=update_values.project_value.name, - digest=create_digest( - update_values.project_value.to_dict(extended=True) - ), - last_update_date=datetime.datetime.now(), - number_of_samples=len(update_values.project_value.samples), - ) + if isinstance(update_dict, UpdateItems): + update_values = update_dict + else: + update_values = UpdateItems(**update_dict) - if update_values.tag is not None: - update_final = UpdateModel( - tag=update_values.tag, **update_final.dict(exclude_unset=True) - ) + update_values = self.__create_update_dict(update_values) - if update_values.is_private is not None: - update_final = UpdateModel( - is_private=update_values.is_private, - **update_final.dict(exclude_unset=True), + update_stmt = ( + update(Projects) + .where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) ) + .values(update_values) + ) - if update_values.name is not None: - update_final = UpdateModel( - name=update_values.name, **update_final.dict(exclude_unset=True) - ) + with self._sa_engine.begin() as engine: + engine.execute(update_stmt) - set_sql, set_values = self.__create_update_set(update_final) - sql = f"""UPDATE {DB_TABLE_NAME} - {set_sql} - WHERE {NAMESPACE_COL} = %s and {NAME_COL} = %s and {TAG_COL} = %s;""" - _LOGGER.debug("Updating items...") - cursor.execute( - sql, - (*set_values, namespace, name, tag), - ) - _LOGGER.info(f"Record '{namespace}/{name}:{tag}' was successfully updated!") - self.con.commit_to_database() + return None else: raise ProjectNotFoundError("No items will be updated!") - return None - @staticmethod - def __create_update_set(update_info: UpdateModel) -> Tuple[str, tuple]: + def __create_update_dict(update_values: UpdateItems) -> dict: """ - Create sql SET string by passing UpdateModel that later is converted to dict - :param update_info: UpdateModel (similar to database model) - :return: {sql_string (contains db keys) and updating values} + + :return: """ - _LOGGER.debug("Creating SET SQL string to update project") - sql_string = f"""SET """ - sql_values = [] - - first = True - for key, val in update_info.dict(exclude_none=True).items(): - if first: - sql_string = "".join([sql_string, f"{key} = %s"]) - first = False - else: - sql_string = ", ".join([sql_string, f"{key} = %s"]) + update_final = UpdateModel() + + if update_values.project_value is not None: + update_final = UpdateModel( + project_value=update_values.project_value.to_dict(extended=True), + name=update_values.project_value.name, + digest=create_digest( + update_values.project_value.to_dict(extended=True) + ), + last_update_date=datetime.datetime.now(), + number_of_samples=len(update_values.project_value.samples), + ) - if isinstance(val, dict): - input_val = json.dumps(val) - else: - input_val = val + if update_values.tag is not None: + update_final = UpdateModel( + tag=update_values.tag, **update_final.dict(exclude_unset=True) + ) - sql_values.append(input_val) + if update_values.is_private is not None: + update_final = UpdateModel( + is_private=update_values.is_private, + **update_final.dict(exclude_unset=True), + ) - return sql_string, tuple(sql_values) + if update_values.name is not None: + update_final = UpdateModel( + name=update_values.name, **update_final.dict(exclude_unset=True) + ) + return update_final.dict(exclude_unset=True) def exists( self, - namespace: str = None, - name: str = None, - tag: str = None, + namespace: str, + name: str, + tag: str = DEFAULT_TAG, ) -> bool: """ Check if project exists in the database. @@ -426,22 +414,19 @@ def exists( :param tag: project tag :return: Returning True if project exist """ - if namespace is None: - namespace = DEFAULT_NAMESPACE - - if tag is None: - tag = DEFAULT_TAG - - if name is None: - _LOGGER.error(f"Name is not specified") - return False - - sql = f"""SELECT {ID_COL} from {DB_TABLE_NAME} - WHERE {NAMESPACE_COL} = %s AND - {NAME_COL} = %s AND - {TAG_COL} = %s;""" - - if self.con.run_sql_fetchone(sql, namespace, name, tag): + with Session(self._sa_engine) as session: + found_prj = session.execute( + select( + Projects.id, + ).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + ).all() + if len(found_prj) > 0: return True else: return False diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index a7c1d96..a3f5a46 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -1,7 +1,8 @@ -from pepdbagent.base_connection import BaseConnection +from pepdbagent.db_utils import BaseEngine from pepdbagent.modules.annotation import PEPDatabaseAnnotation from pepdbagent.modules.project import PEPDatabaseProject from pepdbagent.modules.namespace import PEPDatabaseNamespace +from pepdbagent.const import POSTGRES_DIALECT class PEPDatabaseAgent(object): @@ -12,7 +13,9 @@ def __init__( database="pep-db", user=None, password=None, + drivername=POSTGRES_DIALECT, dsn=None, + echo=False, ): """ Initialize connection to the pep_db database. You can use The basic connection parameters @@ -26,19 +29,22 @@ def __init__( (e.g. "localhost://username:password@pdp_db:5432") """ - con = BaseConnection( + session = BaseEngine( host=host, port=port, database=database, user=user, password=password, + drivername=drivername, dsn=dsn, - ) - self.__con = con + echo=echo, + ).engine - self.__project = PEPDatabaseProject(con) - self.__annotation = PEPDatabaseAnnotation(con) - self.__namespace = PEPDatabaseNamespace(con) + self.__con = session + + self.__project = PEPDatabaseProject(session) + # self.__annotation = PEPDatabaseAnnotation(con) + # self.__namespace = PEPDatabaseNamespace(con) self.__db_name = database @@ -46,13 +52,13 @@ def __init__( def project(self): return self.__project - @property - def annotation(self): - return self.__annotation - - @property - def namespace(self): - return self.__namespace + # @property + # def annotation(self): + # return self.__annotation + # + # @property + # def namespace(self): + # return self.__namespace def __str__(self): return f"Connection to the database: '{self.__db_name}' is set!" diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index c859f96..763196d 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -43,6 +43,7 @@ def all_elements_are_strings(iterable: Iterable) -> bool: def create_digest(project_dict: dict) -> str: """ Create digest for PEP project + :param project_dict: project dict :return: digest string """ @@ -61,6 +62,7 @@ def create_digest(project_dict: dict) -> str: def registry_path_converter(registry_path: str) -> Tuple[str, str, str]: """ Convert registry path to namespace, name, tag + :param registry_path: registry path that has structure: "namespace/name:tag" :return: tuple(namespace, name, tag) """ @@ -78,6 +80,7 @@ def tuple_converter(value: Union[tuple, list, str, None]) -> tuple: """ Convert string list or tuple to tuple. # is used to create admin tuple. + :param value: Any value that has to be converted to tuple :return: tuple of strings """ From bd49bad42c8f226cc0c9947b5c28bd55658e31f0 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 13:39:43 -0400 Subject: [PATCH 07/32] added sqlalchemy namespace and annotation --- pepdbagent/db_utils.py | 50 ++++-- pepdbagent/modules/annotation.py | 261 +++++++++++++++++-------------- pepdbagent/modules/namespace.py | 90 +++++------ pepdbagent/modules/project.py | 26 +-- pepdbagent/pepdbagent.py | 32 ++-- 5 files changed, 255 insertions(+), 204 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index ed09adf..6a75746 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -4,7 +4,7 @@ from sqlalchemy.exc import ProgrammingError from sqlalchemy.orm import Session from sqlalchemy import PrimaryKeyConstraint, FetchedValue -from sqlalchemy import Table +from sqlalchemy import Select, Result from sqlalchemy import select from sqlalchemy import String, BigInteger @@ -33,10 +33,13 @@ def compile_bigserial_pg(type_, compiler, **kw): return "BIGSERIAL" -class Base(MappedAsDataclass, DeclarativeBase): - type_annotation_map = { - dict[str, Any]: JSONB, - } +@compiles(JSONB, "postgresql") +def compile_jsonb_pg(type_, compiler, **kw): + return "JSONB" + + +class Base(DeclarativeBase): + pass @event.listens_for(Base.metadata, "after_create") @@ -60,7 +63,7 @@ class Projects(Base): name: Mapped[str] = mapped_column(primary_key=True) tag: Mapped[str] = mapped_column(primary_key=True) digest: Mapped[str] = mapped_column(String(32)) - project_value: Mapped[dict[str, Any]] + project_value: Mapped[dict] = mapped_column(JSONB, server_default=FetchedValue()) private: Mapped[bool] number_of_samples: Mapped[int] submission_date: Mapped[datetime.datetime] @@ -95,6 +98,7 @@ def __init__( :param database: the name of the database that you want to connect. :param user: the username used to authenticate. :param password: password used to authenticate. + :param drivername: driver used in :param dsn: libpq connection string using the dsn parameter (e.g. 'postgresql://user_name:password@host_name:port/db_name') """ @@ -109,18 +113,43 @@ def __init__( ) self._engine = create_engine(dsn, echo=echo) + self.create_schema(self._engine) session = Session(self._engine) try: - session.execute(select(Projects)).first() + session.execute(select(Projects).limit(1)).first() except ProgrammingError: raise SchemaError() - def create_schema(self): - Base.metadata.create_all(self._engine) + def create_schema(self, engine=None): + """ + Create sql schema in the database. + + :param engine: sqlalchemy engine [Default: None] + :return: None + """ + if not engine: + engine = self._engine + Base.metadata.create_all(engine) + return None + + def session_execute(self, statement: Select) -> Result: + """ + Execute statement using sqlalchemy statement + + :param statement: SQL query or a SQL expression that is constructed using + SQLAlchemy's SQL expression language + :return: query result represented with declarative base + """ + with Session(self._engine) as session: + query_result = session.execute(statement) + return query_result @property def session(self): + """ + :return: started sqlalchemy session + """ return self._start_session() @property @@ -160,7 +189,6 @@ def _create_dsn_string( def main(): - # engine = BaseEngine(dsn='postgresql://postgres:docker@localhost:5432/pep-db') engine = BaseEngine( host="localhost", port=5432, @@ -169,7 +197,7 @@ def main(): password="docker", echo=True, ) - # engine.create_schema() + engine.create_schema() ff = engine.session diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index f23dd31..0790329 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -1,24 +1,19 @@ from typing import Union, List import logging -from pepdbagent.base_connection import BaseConnection +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session +from sqlalchemy import Engine +from sqlalchemy import insert, select, delete, update, func +from sqlalchemy import and_, or_ + +from pepdbagent.db_utils import Projects from pepdbagent.const import ( DEFAULT_LIMIT, DEFAULT_OFFSET, DEFAULT_TAG, - NAMESPACE_COL, - NAME_COL, - TAG_COL, - PRIVATE_COL, - PROJ_COL, - N_SAMPLES_COL, - SUBMISSION_DATE_COL, - LAST_UPDATE_DATE_COL, - DIGEST_COL, - DB_TABLE_NAME, ) from pepdbagent.utils import tuple_converter, registry_path_converter - from pepdbagent.models import AnnotationModel, AnnotationList from pepdbagent.exceptions import RegistryPathError, ProjectNotFoundError @@ -32,11 +27,11 @@ class PEPDatabaseAnnotation: While using this class, user can retrieve all necessary metadata about PEPs """ - def __init__(self, con: BaseConnection): + def __init__(self, engine: Engine): """ - :param con: Connection to db represented by BaseConnection class object + :param engine: Connection to db represented by sqlalchemy engine """ - self.con = con + self._sa_engine = engine def get( self, @@ -143,7 +138,7 @@ def _get_single_annotation( self, namespace: str, name: str, - tag: str = None, + tag: str = DEFAULT_TAG, admin: Union[List[str], str] = None, ) -> Union[AnnotationModel, None]: """ @@ -156,39 +151,43 @@ def _get_single_annotation( """ _LOGGER.info(f"Getting annotation of the project: '{namespace}/{name}:{tag}'") admin_tuple = tuple_converter(admin) - sql_q = f""" - select - {NAMESPACE_COL}, - {NAME_COL}, - {TAG_COL}, - {PRIVATE_COL}, - {PROJ_COL}->>'description', - {N_SAMPLES_COL}, - {SUBMISSION_DATE_COL}, - {LAST_UPDATE_DATE_COL}, - {DIGEST_COL} - from {DB_TABLE_NAME} - """ - if tag is None: - tag = DEFAULT_TAG - - sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s - and ({PRIVATE_COL} is %s or {NAMESPACE_COL} in %s );""" - found_prj = self.con.run_sql_fetchone( - sql_q, name, namespace, tag, False, admin_tuple + query = select( + Projects.namespace, + Projects.name, + Projects.tag, + Projects.private, + Projects.project_value["description"].astext.label("description"), + Projects.number_of_samples, + Projects.submission_date, + Projects.last_update_date, + Projects.digest, + ).where( + and_( + Projects.name == name, + Projects.namespace == namespace, + Projects.tag == tag, + or_( + Projects.namespace.in_(admin_tuple), + Projects.private.is_(False), + ), + ) ) - if len(found_prj) > 0: + + with Session(self._sa_engine) as session: + query_result = session.execute(query).first() + + if len(query_result) > 0: annot = AnnotationModel( - namespace=found_prj[0], - name=found_prj[1], - tag=found_prj[2], - is_private=found_prj[3], - description=found_prj[4], - number_of_samples=found_prj[5], - submission_date=str(found_prj[6]), - last_update_date=str(found_prj[7]), - digest=found_prj[8], + namespace=query_result.namespace, + name=query_result.name, + tag=query_result.tag, + is_private=query_result.private, + description=query_result.description, + number_of_samples=query_result.number_of_samples, + submission_date=str(query_result.submission_date), + last_update_date=str(query_result.last_update_date), + digest=query_result.digest, ) _LOGGER.info( f"Annotation of the project '{namespace}/{name}:{tag}' has been found!" @@ -212,42 +211,38 @@ def _count_projects( :param admin: string or list of admins [e.g. "Khoroshevskyi", or ["doc_adin","Khoroshevskyi"]] :return: number of found project in specified namespace """ + if admin is None: + admin = [] + statement = select(func.count()).select_from(Projects) if search_str: - search_str = f"%%{search_str}%%" - search_sql_values = ( - search_str, - search_str, - search_str, + sql_search_str = f"%{search_str}%" + search_query = or_( + Projects.name.ilike(sql_search_str), + Projects.name.ilike(sql_search_str), ) - search_sql = f"""({NAME_COL} ILIKE %s or ({PROJ_COL}->>'description') ILIKE %s or {TAG_COL} ILIKE %s) and""" - else: - search_sql_values = tuple() - search_sql = "" - admin_tuple = tuple_converter(admin) - if namespace: - and_namespace_sql = f"""AND {NAMESPACE_COL} = %s""" - namespace = (namespace,) - else: - and_namespace_sql = "" - namespace = tuple() - count_sql = f""" - select count(*) - from {DB_TABLE_NAME} where - {search_sql} - ({PRIVATE_COL} is %s or {NAMESPACE_COL} in %s ) {and_namespace_sql};""" - result = self.con.run_sql_fetchall( - count_sql, - *search_sql_values, - False, - admin_tuple, - *namespace, + if self.get_project_number_in_namespace(namespace=namespace, admin=admin) < 1000: + search_query = or_( + search_query, + Projects.project_value["description"].astext.ilike(sql_search_str), + ) + + statement = statement.where( + search_query + ) + if namespace: + statement = statement.where(Projects.namespace == namespace) + statement = statement.where( + or_(Projects.private.is_(False), Projects.namespace.in_(admin)) ) + + with Session(self._sa_engine) as session: + result = session.execute(statement).first() + try: - number_of_prj = result[0][0] + return result[0] except IndexError: - number_of_prj = 0 - return number_of_prj + return 0 def _get_projects( self, @@ -269,59 +264,89 @@ def _get_projects( _LOGGER.info( f"Running annotation search: (namespace: {namespace}, query: {search_str}." ) + + if admin is None: + admin = [] + statement = select( + Projects.namespace, + Projects.name, + Projects.tag, + Projects.private, + Projects.project_value["description"].astext.label("description"), + Projects.number_of_samples, + Projects.submission_date, + Projects.last_update_date, + Projects.digest, + ).select_from(Projects) if search_str: - search_str = f"%%{search_str}%%" - search_sql_values = ( - search_str, - search_str, - search_str, + sql_search_str = f"%{search_str}%" + search_query = or_( + Projects.name.ilike(sql_search_str), + Projects.name.ilike(sql_search_str), ) - search_sql = f"""({NAME_COL} ILIKE %s or ({PROJ_COL}->>'description') ILIKE %s or {TAG_COL} ILIKE %s) and""" - else: - search_sql_values = tuple() - search_sql = "" - admin_tuple = tuple_converter(admin) + if self.get_project_number_in_namespace(namespace=namespace, admin=admin) < 1000: + search_query = or_( + search_query, + Projects.project_value["description"].astext.ilike(sql_search_str), + ) + + statement = statement.where( + search_query + ) if namespace: - and_namespace_sql = f"""AND {NAMESPACE_COL} = %s""" - namespace = (namespace,) - else: - and_namespace_sql = "" - namespace = tuple() + statement = statement.where(Projects.namespace == namespace) - count_sql = f""" - select {NAMESPACE_COL}, {NAME_COL}, {TAG_COL}, {N_SAMPLES_COL}, - ({PROJ_COL}->>'description'), {DIGEST_COL}, {PRIVATE_COL}, - {SUBMISSION_DATE_COL}, {LAST_UPDATE_DATE_COL} - from {DB_TABLE_NAME} where - {search_sql} - ({PRIVATE_COL} is %s or {NAMESPACE_COL} in %s ) {and_namespace_sql} - LIMIT %s OFFSET %s; - """ - results = self.con.run_sql_fetchall( - count_sql, - *search_sql_values, - False, - admin_tuple, - *namespace, - limit, - offset, + statement = statement.where( + or_(Projects.private.is_(False), Projects.namespace.in_(admin)) ) + + with Session(self._sa_engine) as session: + query_results = session.execute(statement.limit(limit).offset(offset)).all() + results_list = [] - for res in results: + for result in query_results: results_list.append( AnnotationModel( - namespace=res[0], - name=res[1], - tag=res[2], - number_of_samples=res[3], - description=res[4], - digest=res[5], - is_private=res[6], - last_update_date=str(res[8]), - submission_date=str(res[7]), + namespace=result.namespace, + name=result.name, + tag=result.tag, + is_private=result.private, + description=result.description, + number_of_samples=result.number_of_samples, + submission_date=str(result.submission_date), + last_update_date=str(result.last_update_date), + digest=result.digest, ) ) return results_list + + def get_project_number_in_namespace( + self, + namespace: str, + admin: Union[str, List[str]] = None, + ) -> int: + """ + Get project by providing search string. + :param namespace: namespace where to search for a project + :param admin: True, if user is admin of the namespace [Default: False] + :return Integer: number of projects in the namepsace + """ + if admin is None: + admin = [] + statement = select(func.count() + ).select_from(Projects + ).where(Projects.namespace == namespace) + statement = statement.where( + or_(Projects.private.is_(False), Projects.namespace.in_(admin)) + ) + + with Session(self._sa_engine) as session: + result = session.execute(statement).first() + + try: + return result[0] + except IndexError: + return 0 diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 2b09f29..537279d 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -1,15 +1,15 @@ from typing import Union, List import logging -from pepdbagent.base_connection import BaseConnection +from sqlalchemy.orm import Session +from sqlalchemy import Engine +from sqlalchemy import insert, select, delete, update, func, distinct +from sqlalchemy import and_, or_ + +from pepdbagent.db_utils import Projects from pepdbagent.const import ( DEFAULT_LIMIT, DEFAULT_OFFSET, - NAMESPACE_COL, - NAME_COL, - N_SAMPLES_COL, - DB_TABLE_NAME, - PRIVATE_COL, ) from pepdbagent.models import Namespace, NamespaceList @@ -25,11 +25,11 @@ class PEPDatabaseNamespace: While using this class, user can retrieve all necessary metadata about PEPs """ - def __init__(self, con: BaseConnection): + def __init__(self, engine: Engine): """ - :param con: Connection to db represented by BaseConnection class object + :param engine: Connection to db represented by sqlalchemy engine """ - self.con = con + self._sa_engine = engine def get( self, @@ -85,31 +85,32 @@ def _get_namespace( number_of_samples, } """ - if search_str: - search_str = f"%%{search_str}%%" - search_sql_values = (search_str,) - search_sql = f"""{NAMESPACE_COL} ILIKE %s and""" - else: - search_sql_values = tuple() - search_sql = "" + statement = select(Projects.namespace, + func.count(Projects.name).label("number_of_projects"), + func.sum(Projects.number_of_samples).label("number_of_samples"), + ).group_by(Projects.namespace).select_from(Projects) - count_sql = f""" - select {NAMESPACE_COL}, COUNT({NAME_COL}), SUM({N_SAMPLES_COL}) - from {DB_TABLE_NAME} where {search_sql} - ({PRIVATE_COL} is %s or {NAMESPACE_COL} in %s) - GROUP BY {NAMESPACE_COL} - LIMIT %s OFFSET %s; - """ - results = self.con.run_sql_fetchall( - count_sql, *search_sql_values, False, admin_nsp, limit, offset + if search_str: + sql_search_str = f"%{search_str}%" + statement = statement.where( + or_( + Projects.namespace.ilike(sql_search_str), + ) + ) + statement = statement.where( + or_(Projects.private.is_(False), Projects.namespace.in_(admin_nsp)) ) + statement = statement.limit(limit).offset(offset) + with Session(self._sa_engine) as session: + query_results = session.execute(statement).all() + results_list = [] - for res in results: + for res in query_results: results_list.append( Namespace( - namespace=res[0], - number_of_projects=res[1], - number_of_samples=res[2], + namespace=res.namespace, + number_of_projects=res.number_of_projects, + number_of_samples=res.number_of_samples, ) ) return results_list @@ -122,23 +123,18 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = None) -> i :param admin_nsp: tuple of namespaces where project can be retrieved if they are privet :return: number of found namespaces """ + statement = select(func.count(distinct(Projects.namespace)).label("number_of_namespaces")).select_from(Projects) if search_str: - search_str = f"%%{search_str}%%" - search_sql_values = (search_str,) - search_sql = f"""{NAMESPACE_COL} ILIKE %s and""" - else: - search_sql_values = tuple() - search_sql = "" - count_sql = f""" - select COUNT(DISTINCT ({NAMESPACE_COL})) - from {DB_TABLE_NAME} where {search_sql} - ({PRIVATE_COL} is %s or {NAMESPACE_COL} in %s) - """ - result = self.con.run_sql_fetchall( - count_sql, *search_sql_values, False, admin_nsp + sql_search_str = f"%{search_str}%" + statement = statement.where( + or_( + Projects.namespace.ilike(sql_search_str), + ) + ) + statement = statement.where( + or_(Projects.private.is_(False), Projects.namespace.in_(admin_nsp)) ) - try: - number_of_prj = result[0][0] - except KeyError: - number_of_prj = 0 - return number_of_prj + with Session(self._sa_engine) as session: + query_results = session.execute(statement).first() + + return query_results.number_of_namespaces diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index ad5bf70..99a57c4 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -42,7 +42,7 @@ def get( self, namespace: str, name: str, - tag: str = None, + tag: str = DEFAULT_TAG, raw: bool = False, ) -> Union[peppy.Project, dict, None]: """ @@ -61,9 +61,6 @@ def get( _subsample_dict: dict } """ - if tag is None: - tag = DEFAULT_TAG - with Session(self._sa_engine) as session: found_prj = session.execute( select( @@ -159,6 +156,7 @@ def delete_by_rp( ) -> None: """ Delete record from database by using registry_path + :param registry_path: Registry path of the project ('namespace/name:tag') :return: None """ @@ -179,6 +177,7 @@ def create( Upload project to the database. Project with the key, that already exists won't be uploaded(but case, when argument update is set True) + :param peppy.Project project: Project object that has to be uploaded to the DB :param namespace: namespace of the project (Default: 'other') :param name: name of the project (Default: name is taken from the project object) @@ -193,6 +192,7 @@ def create( if name: proj_name = name + proj_dict["name"] = name elif proj_dict["name"]: proj_name = proj_dict["name"] else: @@ -200,12 +200,8 @@ def create( f"Name of the project wasn't provided. Project will not be uploaded." ) - proj_dict["name"] = name - proj_digest = create_digest(proj_dict) - number_of_samples = len(project.samples) - proj_dict = json.dumps(proj_dict) if update_only: _LOGGER.info( @@ -225,8 +221,8 @@ def create( try: _LOGGER.info(f"Uploading {namespace}/{proj_name}:{tag} project...") - with self._sa_engine.begin() as engine: - engine.execute( + with Session(self._sa_engine) as session: + session.execute( insert(Projects).values( namespace=namespace, name=proj_name, @@ -240,6 +236,8 @@ def create( ) ) + return None + except IntegrityError: if overwrite: self._overwrite( @@ -272,6 +270,7 @@ def _overwrite( ) -> None: """ Update existing project by providing all necessary information. + :param project_dict: project dictionary in json format :param namespace: project namespace :param proj_name: project name @@ -324,6 +323,7 @@ def update( ) -> None: """ Update partial parts of the record in db + :param update_dict: dict with update key->values. Dict structure: { project: Optional[peppy.Project] @@ -368,8 +368,12 @@ def update( @staticmethod def __create_update_dict(update_values: UpdateItems) -> dict: """ + Modify keys and values that set for update and create unified + dictionary of the values that have to be updated - :return: + :param update_values: UpdateItems (pydantic class) with + updating values + :return: unified update dict """ update_final = UpdateModel() diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index a3f5a46..e7eea17 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -25,11 +25,12 @@ def __init__( :param database: the name of the database that you want to connect. :param user: the username used to authenticate. :param password: password used to authenticate. + :param drivername: driver of the database [Default: postgresql] :param dsn: libpq connection string using the dsn parameter (e.g. "localhost://username:password@pdp_db:5432") """ - session = BaseEngine( + sa_engine = BaseEngine( host=host, port=port, database=database, @@ -40,11 +41,11 @@ def __init__( echo=echo, ).engine - self.__con = session + self.__sa_engine = sa_engine - self.__project = PEPDatabaseProject(session) - # self.__annotation = PEPDatabaseAnnotation(con) - # self.__namespace = PEPDatabaseNamespace(con) + self.__project = PEPDatabaseProject(sa_engine) + self.__annotation = PEPDatabaseAnnotation(sa_engine) + self.__namespace = PEPDatabaseNamespace(sa_engine) self.__db_name = database @@ -52,23 +53,20 @@ def __init__( def project(self): return self.__project - # @property - # def annotation(self): - # return self.__annotation - # - # @property - # def namespace(self): - # return self.__namespace + @property + def annotation(self): + return self.__annotation + + @property + def namespace(self): + return self.__namespace def __str__(self): return f"Connection to the database: '{self.__db_name}' is set!" - def __del__(self): - self.__con.__del__() - def __exit__(self): - self.__con.__exit__() + self.__sa_engine.__exit__() @property def connection(self): - return self.__con + return self.__sa_engine From b233d4ed93ed16b5cc3f26dd8c1ec6d741164875 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 13:46:38 -0400 Subject: [PATCH 08/32] cleaning --- pepdbagent/base_connection.py | 142 ------------------------------- pepdbagent/schema_initializer.py | 67 --------------- 2 files changed, 209 deletions(-) delete mode 100644 pepdbagent/base_connection.py delete mode 100644 pepdbagent/schema_initializer.py diff --git a/pepdbagent/base_connection.py b/pepdbagent/base_connection.py deleted file mode 100644 index ec664c3..0000000 --- a/pepdbagent/base_connection.py +++ /dev/null @@ -1,142 +0,0 @@ -from urllib.parse import urlparse -import logging -import psycopg2 -from .const import * -from .exceptions import SchemaError - -_LOGGER = logging.getLogger("pepdbagent") - - -class BaseConnection: - """ - A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all - """ - - def __init__( - self, - host="localhost", - port=5432, - database="pep-db", - user=None, - password=None, - dsn=None, - ): - """ - Initialize connection to the pep_db database. You can use The basic connection parameters - or libpq connection string. - :param host: database server address e.g., localhost or an IP address. - :param port: the port number that defaults to 5432 if it is not provided. - :param database: the name of the database that you want to connect. - :param user: the username used to authenticate. - :param password: password used to authenticate. - :param dsn: libpq connection string using the dsn parameter - (e.g. "localhost://username:password@pdp_db:5432") - """ - - _LOGGER.info(f"Initializing connection to {database}...") - - if dsn is not None: - self.pg_connection = psycopg2.connect(dsn) - self.db_name = self._extract_db_name(dsn) - else: - self.pg_connection = psycopg2.connect( - host=host, - port=port, - database=database, - user=user, - password=password, - ) - self.db_name = database - - # Ensure data is added to the database immediately after write commands - self.pg_connection.autocommit = True - - self._check_conn_db() - _LOGGER.info(f"Connected successfully!") - - def commit_to_database(self) -> None: - """ - Commit to database - """ - self.pg_connection.commit() - - def close_connection(self) -> None: - """ - Close connection with database - """ - self.pg_connection.close() - - def __exit__(self): - self.close_connection() - - def __del__(self): - self.close_connection() - - def run_sql_fetchone(self, sql_query: str, *argv) -> list: - """ - Fetching one result by providing sql query and arguments - :param sql_query: sql string that has to run - :param argv: arguments that has to be added to sql query - :return: set of query result - """ - cursor = self.pg_connection.cursor() - _LOGGER.debug(f"Running fetch_one function with sql: {sql_query}") - try: - cursor.execute(sql_query, argv) - output_result = cursor.fetchone() - - # must run check here since None is not iterable. - if output_result is not None: - return list(output_result) - else: - return [] - except psycopg2.Error as e: - _LOGGER.error(f"Error occurred while running query: {e}") - finally: - cursor.close() - - def run_sql_fetchall(self, sql_query: str, *argv) -> list: - """ - Fetching all result by providing sql query and arguments - :param str sql_query: sql string that has to run - :param argv: arguments that has to be added to sql query - :return: set of query result - """ - _LOGGER.debug(f"Running fetch_all function with sql: {sql_query}") - cursor = self.pg_connection.cursor() - try: - cursor.execute(sql_query, (*argv,)) - output_result = cursor.fetchall() - cursor.close() - return output_result - except psycopg2.Error as e: - _LOGGER.error(f"Error occurred while running query: {e}") - finally: - cursor.close() - - def _check_conn_db(self) -> None: - """ - Checking if connected database has correct column_names - """ - a = f""" - SELECT * - FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_NAME = N'{DB_TABLE_NAME}' - """ - result = self.run_sql_fetchall(a) - cols_name = [] - for col in result: - cols_name.append(col[3]) - DB_COLUMNS.sort() - cols_name.sort() - if DB_COLUMNS != cols_name: - raise SchemaError - - @staticmethod - def _extract_db_name(dsn: str) -> str: - """ - Extract database name from libpq conncection string - :param dsn: libpq connection string using the dsn parameter - :return: database name - """ - return urlparse(dsn).path[1:] diff --git a/pepdbagent/schema_initializer.py b/pepdbagent/schema_initializer.py deleted file mode 100644 index c3fb9bf..0000000 --- a/pepdbagent/schema_initializer.py +++ /dev/null @@ -1,67 +0,0 @@ -from sqlalchemy.orm import Mapped -from sqlalchemy.orm import mapped_column -from sqlalchemy import String, BigInteger -from sqlalchemy.ext.compiler import compiles -import datetime -from sqlalchemy.dialects.postgresql import JSONB -from typing import Optional, Any -from sqlalchemy import create_engine -from sqlalchemy import event - -from sqlalchemy.orm import DeclarativeBase, MappedAsDataclass -from sqlalchemy import PrimaryKeyConstraint, FetchedValue - -class BIGSERIAL(BigInteger): - pass - - -@compiles(BIGSERIAL, "postgresql") -def compile_bigserial_pg(type_, compiler, **kw): - return "BIGSERIAL" - - -class Base(MappedAsDataclass, DeclarativeBase): - type_annotation_map = { - dict[str, Any]: JSONB, - } - - -@event.listens_for(Base.metadata, 'after_create') -def receive_after_create(target, connection, tables, **kw): - "listen for the 'after_create' event" - if tables: - print('A table was created') - else: - print('A table was not created') - - -class Projects(Base): - __tablename__ = "projects" - - id: Mapped[int] = mapped_column(BIGSERIAL, server_default=FetchedValue()) - namespace: Mapped[str] = mapped_column(primary_key=True) - name: Mapped[str] = mapped_column(primary_key=True) - tag: Mapped[str] = mapped_column(primary_key=True) - digest: Mapped[str] = mapped_column(String(32)) - project_value: Mapped[dict[str, Any]] - private: Mapped[bool] - number_of_samples: Mapped[int] - submission_date: Mapped[datetime.datetime] - last_update_date: Mapped[datetime.datetime] - # schema: Mapped[Optional[str]] - - __table_args__ = ( - PrimaryKeyConstraint("namespace", "name", "tag", name="id"), - ) - - -def main(): - engine = create_engine('postgresql://postgres:docker@localhost:5432/pep-db', echo=True, future=True) - asd = Base.metadata.create_all(engine) - - - - -if __name__ == "__main__": - main() - From 3136417672207e18820b3313acddc5f28e6edece Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 15:51:33 -0400 Subject: [PATCH 09/32] cleaning 2 --- pep_db/Dockerfile | 2 +- pepdbagent/__init__.py | 7 ++-- pepdbagent/db_utils.py | 56 ++++++++++++------------------- pepdbagent/models.py | 2 +- pepdbagent/modules/annotation.py | 52 ++++++++++++++-------------- pepdbagent/modules/namespace.py | 29 ++++++++-------- pepdbagent/modules/project.py | 20 ++++------- pepdbagent/pepdbagent.py | 4 +-- pepdbagent/utils.py | 6 ++-- requirements/requirements-all.txt | 2 +- setup.py | 3 +- tests/test_pepagent.py | 12 ++++--- 12 files changed, 89 insertions(+), 106 deletions(-) diff --git a/pep_db/Dockerfile b/pep_db/Dockerfile index 4343fbb..5d04731 100644 --- a/pep_db/Dockerfile +++ b/pep_db/Dockerfile @@ -2,4 +2,4 @@ FROM postgres ENV POSTGRES_USER postgres ENV POSTGRES_PASSWORD docker ENV POSTGRES_DB pep-db -COPY pep_db.sql /docker-entrypoint-initdb.d/ \ No newline at end of file +#COPY pep_db.sql /docker-entrypoint-initdb.d/ \ No newline at end of file diff --git a/pepdbagent/__init__.py b/pepdbagent/__init__.py index 09f57b0..4098c3b 100644 --- a/pepdbagent/__init__.py +++ b/pepdbagent/__init__.py @@ -1,10 +1,9 @@ """ Package-level data """ -from .pepdbagent import * -from ._version import __version__ - -import logmuse import coloredlogs +import logmuse +from ._version import __version__ +from .pepdbagent import * _LOGGER = logmuse.init_logger("pepdbagent") coloredlogs.install( diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 6a75746..e674d6b 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -1,22 +1,27 @@ -from sqlalchemy.engine import create_engine -from sqlalchemy.orm import Mapped, mapped_column -from sqlalchemy.orm import DeclarativeBase, MappedAsDataclass -from sqlalchemy.exc import ProgrammingError -from sqlalchemy.orm import Session -from sqlalchemy import PrimaryKeyConstraint, FetchedValue -from sqlalchemy import Select, Result -from sqlalchemy import select - -from sqlalchemy import String, BigInteger -from sqlalchemy.dialects.postgresql import JSONB - -from sqlalchemy import event -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.engine import URL - -from typing import Optional, Any import datetime import logging +from typing import Any, Optional + +from sqlalchemy import ( + BigInteger, + FetchedValue, + PrimaryKeyConstraint, + Result, + Select, + String, + event, + select, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.engine import URL, create_engine +from sqlalchemy.exc import ProgrammingError +from sqlalchemy.ext.compiler import compiles +from sqlalchemy.orm import ( + DeclarativeBase, + Mapped, + Session, + mapped_column, +) from pepdbagent.const import POSTGRES_DIALECT from pepdbagent.exceptions import SchemaError @@ -186,20 +191,3 @@ def _create_dsn_string( :return: sqlalchemy connection string """ return f"{dialect}://{user}:{password}@{host}:{port}/{database}" - - -def main(): - engine = BaseEngine( - host="localhost", - port=5432, - database="pep-db", - user="postgres", - password="docker", - echo=True, - ) - engine.create_schema() - ff = engine.session - - -if __name__ == "__main__": - main() diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 0f2cdf3..4edde94 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -2,8 +2,8 @@ import datetime from typing import List, Optional -from pydantic import BaseModel, Field, validator, Extra import peppy +from pydantic import BaseModel, Extra, Field, validator class AnnotationModel(BaseModel): diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 0790329..e0a27fe 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -1,21 +1,16 @@ -from typing import Union, List import logging +from typing import List, Union +from sqlalchemy import Engine, func, select +from sqlalchemy import and_, or_ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session -from sqlalchemy import Engine -from sqlalchemy import insert, select, delete, update, func -from sqlalchemy import and_, or_ +from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, DEFAULT_TAG from pepdbagent.db_utils import Projects -from pepdbagent.const import ( - DEFAULT_LIMIT, - DEFAULT_OFFSET, - DEFAULT_TAG, -) -from pepdbagent.utils import tuple_converter, registry_path_converter -from pepdbagent.models import AnnotationModel, AnnotationList -from pepdbagent.exceptions import RegistryPathError, ProjectNotFoundError +from pepdbagent.exceptions import ProjectNotFoundError, RegistryPathError +from pepdbagent.models import AnnotationList, AnnotationModel +from pepdbagent.utils import registry_path_converter, tuple_converter _LOGGER = logging.getLogger("pepdbagent") @@ -221,15 +216,16 @@ def _count_projects( Projects.name.ilike(sql_search_str), ) - if self.get_project_number_in_namespace(namespace=namespace, admin=admin) < 1000: + if ( + self.get_project_number_in_namespace(namespace=namespace, admin=admin) + < 1000 + ): search_query = or_( search_query, Projects.project_value["description"].astext.ilike(sql_search_str), ) - statement = statement.where( - search_query - ) + statement = statement.where(search_query) if namespace: statement = statement.where(Projects.namespace == namespace) statement = statement.where( @@ -285,16 +281,16 @@ def _get_projects( Projects.name.ilike(sql_search_str), ) - if self.get_project_number_in_namespace(namespace=namespace, admin=admin) < 1000: - + if ( + self.get_project_number_in_namespace(namespace=namespace, admin=admin) + < 1000 + ): search_query = or_( search_query, Projects.project_value["description"].astext.ilike(sql_search_str), ) - statement = statement.where( - search_query - ) + statement = statement.where(search_query) if namespace: statement = statement.where(Projects.namespace == namespace) @@ -324,9 +320,9 @@ def _get_projects( return results_list def get_project_number_in_namespace( - self, - namespace: str, - admin: Union[str, List[str]] = None, + self, + namespace: str, + admin: Union[str, List[str]] = None, ) -> int: """ Get project by providing search string. @@ -336,9 +332,11 @@ def get_project_number_in_namespace( """ if admin is None: admin = [] - statement = select(func.count() - ).select_from(Projects - ).where(Projects.namespace == namespace) + statement = ( + select(func.count()) + .select_from(Projects) + .where(Projects.namespace == namespace) + ) statement = statement.where( or_(Projects.private.is_(False), Projects.namespace.in_(admin)) ) diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 537279d..10ee891 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -1,17 +1,11 @@ -from typing import Union, List import logging +from typing import List, Union +from sqlalchemy import Engine, and_, delete, distinct, func, insert, or_, select, update from sqlalchemy.orm import Session -from sqlalchemy import Engine -from sqlalchemy import insert, select, delete, update, func, distinct -from sqlalchemy import and_, or_ +from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET from pepdbagent.db_utils import Projects -from pepdbagent.const import ( - DEFAULT_LIMIT, - DEFAULT_OFFSET, -) - from pepdbagent.models import Namespace, NamespaceList from pepdbagent.utils import tuple_converter @@ -85,10 +79,15 @@ def _get_namespace( number_of_samples, } """ - statement = select(Projects.namespace, - func.count(Projects.name).label("number_of_projects"), - func.sum(Projects.number_of_samples).label("number_of_samples"), - ).group_by(Projects.namespace).select_from(Projects) + statement = ( + select( + Projects.namespace, + func.count(Projects.name).label("number_of_projects"), + func.sum(Projects.number_of_samples).label("number_of_samples"), + ) + .group_by(Projects.namespace) + .select_from(Projects) + ) if search_str: sql_search_str = f"%{search_str}%" @@ -123,7 +122,9 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = None) -> i :param admin_nsp: tuple of namespaces where project can be retrieved if they are privet :return: number of found namespaces """ - statement = select(func.count(distinct(Projects.namespace)).label("number_of_namespaces")).select_from(Projects) + statement = select( + func.count(distinct(Projects.namespace)).label("number_of_namespaces") + ).select_from(Projects) if search_str: sql_search_str = f"%{search_str}%" statement = statement.where( diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 99a57c4..7510b34 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -1,26 +1,18 @@ import datetime import json -from typing import Union, Tuple import logging +from typing import Tuple, Union + import peppy +from sqlalchemy import Engine, and_, delete, insert, or_, select, update from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session -from sqlalchemy import Engine -from sqlalchemy import insert, select, delete, update -from sqlalchemy import and_, or_ -from pepdbagent.db_utils import Projects -from pepdbagent.models import ( - UpdateModel, - UpdateItems, -) -from pepdbagent.base_connection import BaseConnection from pepdbagent.const import * +from pepdbagent.db_utils import Projects +from pepdbagent.exceptions import ProjectNotFoundError, ProjectUniqueNameError +from pepdbagent.models import UpdateItems, UpdateModel from pepdbagent.utils import create_digest, registry_path_converter -from pepdbagent.exceptions import ( - ProjectNotFoundError, - ProjectUniqueNameError, -) _LOGGER = logging.getLogger("pepdbagent") diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index e7eea17..85a5188 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -1,8 +1,8 @@ +from pepdbagent.const import POSTGRES_DIALECT from pepdbagent.db_utils import BaseEngine from pepdbagent.modules.annotation import PEPDatabaseAnnotation -from pepdbagent.modules.project import PEPDatabaseProject from pepdbagent.modules.namespace import PEPDatabaseNamespace -from pepdbagent.const import POSTGRES_DIALECT +from pepdbagent.modules.project import PEPDatabaseProject class PEPDatabaseAgent(object): diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index 763196d..36eaa41 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -1,11 +1,13 @@ -from collections.abc import Iterable import json +from collections.abc import Iterable from hashlib import md5 from typing import Tuple, Union + import ubiquerg -from .exceptions import RegistryPathError from peppy.const import SAMPLE_RAW_DICT_KEY +from .exceptions import RegistryPathError + def is_valid_registry_path(rpath: str) -> bool: """ diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index b418b41..e93990f 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,4 +1,4 @@ -psycopg2-binary +sqlalchemy>=2.0.0 logmuse peppy>=0.35.4 ubiquerg>=0.6.2 diff --git a/setup.py b/setup.py index 98ad67e..2a124b7 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ -import sys import os +import sys + from setuptools import find_packages, setup PACKAGE_NAME = "pepdbagent" diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index d228577..04bad40 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -1,10 +1,12 @@ -from psycopg2.errors import UniqueViolation -from pepdbagent.pepdbagent import PEPDatabaseAgent -from pepdbagent.models import BaseModel +import datetime import json -import psycopg2 + +import sqlalchemy import pytest -import datetime +from psycopg2.errors import UniqueViolation + +from pepdbagent.models import BaseModel +from pepdbagent.pepdbagent import PEPDatabaseAgent class TestBaseConnection: From 87afcf6d872116d0f9d0b8ce2606f79691f34c02 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 16:33:53 -0400 Subject: [PATCH 10/32] fixed uploading project --- pepdbagent/modules/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 7510b34..8ac5c0a 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -213,8 +213,8 @@ def create( try: _LOGGER.info(f"Uploading {namespace}/{proj_name}:{tag} project...") - with Session(self._sa_engine) as session: - session.execute( + with self._sa_engine.begin() as eng: + eng.execute( insert(Projects).values( namespace=namespace, name=proj_name, From 10bc70940cb02b1433f74b7d0d6d2ab882cb78c4 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 17:05:19 -0400 Subject: [PATCH 11/32] fixed annotation search --- pepdbagent/db_utils.py | 1 + pepdbagent/modules/annotation.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index e674d6b..89b23ee 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -74,6 +74,7 @@ class Projects(Base): submission_date: Mapped[datetime.datetime] last_update_date: Mapped[datetime.datetime] # schema: Mapped[Optional[str]] + # fork: Mapped[id] = mapped_column(BIGSERIAL) __table_args__ = (PrimaryKeyConstraint("namespace", "name", "tag", name="id"),) diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index e0a27fe..5e72d26 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -213,7 +213,7 @@ def _count_projects( sql_search_str = f"%{search_str}%" search_query = or_( Projects.name.ilike(sql_search_str), - Projects.name.ilike(sql_search_str), + Projects.tag.ilike(sql_search_str), ) if ( @@ -278,7 +278,7 @@ def _get_projects( sql_search_str = f"%{search_str}%" search_query = or_( Projects.name.ilike(sql_search_str), - Projects.name.ilike(sql_search_str), + Projects.tag.ilike(sql_search_str), ) if ( From 51ba7f9e8880f4e0ceb176db8eb6b8bf61040210 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Jun 2023 10:58:06 -0400 Subject: [PATCH 12/32] fixed project models --- pepdbagent/db_utils.py | 1 - pepdbagent/models.py | 13 ++++++++++--- pepdbagent/modules/project.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 89b23ee..e674d6b 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -74,7 +74,6 @@ class Projects(Base): submission_date: Mapped[datetime.datetime] last_update_date: Mapped[datetime.datetime] # schema: Mapped[Optional[str]] - # fork: Mapped[id] = mapped_column(BIGSERIAL) __table_args__ = (PrimaryKeyConstraint("namespace", "name", "tag", name="id"),) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 4edde94..804f842 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -75,7 +75,8 @@ class UpdateItems(BaseModel): is_private: Optional[bool] name: Optional[str] - # class Config: + class Config: + arbitrary_types_allowed = True # extra = Extra.forbid @@ -86,12 +87,18 @@ class UpdateModel(BaseModel): """ project_value: Optional[dict] - name: Optional[str] - tag: Optional[str] + name: Optional[str] = None + tag: Optional[str] = None private: Optional[bool] = Field(alias="is_private") digest: Optional[str] last_update_date: Optional[datetime.datetime] number_of_samples: Optional[int] + @validator('tag', 'name') + def tag_must_not_be_empty(cls, v): + if '' == v: + return None + return v + class Config: extra = Extra.forbid diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 8ac5c0a..9752c20 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -395,7 +395,7 @@ def __create_update_dict(update_values: UpdateItems) -> dict: update_final = UpdateModel( name=update_values.name, **update_final.dict(exclude_unset=True) ) - return update_final.dict(exclude_unset=True) + return update_final.dict(exclude_unset=True, exclude_none=True) def exists( self, From 2e108455f777795b17e33a935ab053c44d857401 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Jun 2023 13:27:33 -0400 Subject: [PATCH 13/32] Fixed #68 --- pepdbagent/models.py | 5 +- pepdbagent/modules/annotation.py | 132 +++++++++++++++++++++---------- 2 files changed, 92 insertions(+), 45 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 804f842..9f41a34 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -77,6 +77,7 @@ class UpdateItems(BaseModel): class Config: arbitrary_types_allowed = True + # extra = Extra.forbid @@ -94,9 +95,9 @@ class UpdateModel(BaseModel): last_update_date: Optional[datetime.datetime] number_of_samples: Optional[int] - @validator('tag', 'name') + @validator("tag", "name") def tag_must_not_be_empty(cls, v): - if '' == v: + if "" == v: return None return v diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 5e72d26..54da130 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -11,6 +11,7 @@ from pepdbagent.exceptions import ProjectNotFoundError, RegistryPathError from pepdbagent.models import AnnotationList, AnnotationModel from pepdbagent.utils import registry_path_converter, tuple_converter +from sqlalchemy.sql.selectable import Select _LOGGER = logging.getLogger("pepdbagent") @@ -37,9 +38,12 @@ def get( admin: Union[List[str], str] = None, limit: int = DEFAULT_LIMIT, offset: int = DEFAULT_OFFSET, + order_by: str = "update_date", + order_desc: bool = False, ) -> AnnotationList: """ Get project annotations. + There is 5 scenarios how to get project or projects annotations: - provide name, namespace and tag. Return: project annotations of exact provided PK(namespace, name, tag) - provide only namespace. Return: list of projects annotations in specified namespace @@ -54,6 +58,10 @@ def get( :param admin: admin name (namespace), or list of namespaces, where user is admin :param limit: return limit :param offset: return offset + :param order_by: sort the result-set by the information + Options: ["name", "update_date", "submission_date"] + [Default: update_date] + :param order_desc: Sort the records in descending order. [Default: False] :return: pydantic model: AnnotationReturnModel """ if all([namespace, name, tag]): @@ -83,6 +91,8 @@ def get( admin=admin, offset=offset, limit=limit, + order_by=order_by, + order_desc=order_desc, ), ) @@ -209,27 +219,11 @@ def _count_projects( if admin is None: admin = [] statement = select(func.count()).select_from(Projects) - if search_str: - sql_search_str = f"%{search_str}%" - search_query = or_( - Projects.name.ilike(sql_search_str), - Projects.tag.ilike(sql_search_str), - ) - - if ( - self.get_project_number_in_namespace(namespace=namespace, admin=admin) - < 1000 - ): - search_query = or_( - search_query, - Projects.project_value["description"].astext.ilike(sql_search_str), - ) - - statement = statement.where(search_query) - if namespace: - statement = statement.where(Projects.namespace == namespace) - statement = statement.where( - or_(Projects.private.is_(False), Projects.namespace.in_(admin)) + statement = self._add_where_clause( + statement, + namespace=namespace, + search_str=search_str, + admin_list=admin, ) with Session(self._sa_engine) as session: @@ -247,14 +241,20 @@ def _get_projects( admin: Union[str, List[str]] = None, limit: int = DEFAULT_LIMIT, offset: int = DEFAULT_OFFSET, + order_by: str = "update_date", + order_desc: bool = False, ) -> List[AnnotationModel]: """ Get project by providing search string. :param namespace: namespace where to search for a project - :param search_str: search string that has to be found in the name, tag or project description + :param search_str: search string that has to be found in the name or tag :param admin: True, if user is admin of the namespace [Default: False] :param limit: limit of return results :param offset: number of results off set (that were already showed) + :param order_by: sort the result-set by the information + Options: ["name", "update_date", "submission_date"] + [Default: "update_date"] + :param order_desc: Sort the records in descending order. [Default: False] :return: list of found projects with their annotations. """ _LOGGER.info( @@ -274,29 +274,11 @@ def _get_projects( Projects.last_update_date, Projects.digest, ).select_from(Projects) - if search_str: - sql_search_str = f"%{search_str}%" - search_query = or_( - Projects.name.ilike(sql_search_str), - Projects.tag.ilike(sql_search_str), - ) - - if ( - self.get_project_number_in_namespace(namespace=namespace, admin=admin) - < 1000 - ): - search_query = or_( - search_query, - Projects.project_value["description"].astext.ilike(sql_search_str), - ) - statement = statement.where(search_query) - if namespace: - statement = statement.where(Projects.namespace == namespace) - - statement = statement.where( - or_(Projects.private.is_(False), Projects.namespace.in_(admin)) + statement = self._add_where_clause( + statement, namespace=namespace, search_str=search_str, admin_list=admin ) + statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) with Session(self._sa_engine) as session: query_results = session.execute(statement.limit(limit).offset(offset)).all() @@ -319,6 +301,69 @@ def _get_projects( return results_list + @staticmethod + def _add_order_by_keyword( + statement: Select, by: str = "update_date", desc: bool = False + ) -> Select: + """ + Add order by clause to sqlalchemy statement + + :param statement: sqlalchemy representation of a SELECT statement. + :param by: sort the result-set by the information + Options: ["name", "update_date", "submission_date"] + [Default: "update_date"] + :param desc: Sort the records in descending order. [Default: False] + :return: sqlalchemy representation of a SELECT statement with order by keyword + """ + if by == "update_date": + order_by_obj = Projects.last_update_date + elif by == "name": + order_by_obj = Projects.name + elif by == "submission_date": + order_by_obj = Projects.submission_date + else: + _LOGGER.warning( + f"order by: '{by}' statement is unavailable. Projects are sorted by 'update_date'" + ) + order_by_obj = Projects.last_update_date + + if desc: + order_by_obj = order_by_obj.desc() + + return statement.order_by(order_by_obj) + + @staticmethod + def _add_where_clause( + statement: Select, + namespace: str = None, + search_str: str = None, + admin_list: Union[str, List[str]] = None, + ) -> Select: + """ + Add where clause to sqlalchemy statement + + :param statement: sqlalchemy representation of a SELECT statement. + :param namespace: project namespace sql:(where namespace = "") + :param search_str: search string that has to be found in the name or tag + :param admin_list: list or string of admin rights to namespace + :return: sqlalchemy representation of a SELECT statement with where clause. + """ + if search_str: + sql_search_str = f"%{search_str}%" + search_query = or_( + Projects.name.ilike(sql_search_str), + Projects.tag.ilike(sql_search_str), + ) + statement = statement.where(search_query) + if namespace: + statement = statement.where(Projects.namespace == namespace) + + statement = statement.where( + or_(Projects.private.is_(False), Projects.namespace.in_(admin_list)) + ) + + return statement + def get_project_number_in_namespace( self, namespace: str, @@ -326,6 +371,7 @@ def get_project_number_in_namespace( ) -> int: """ Get project by providing search string. + :param namespace: namespace where to search for a project :param admin: True, if user is admin of the namespace [Default: False] :return Integer: number of projects in the namepsace From 4442c5e0d476743a853a5a5c311efcdb6ad59f9c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Jun 2023 14:39:58 -0400 Subject: [PATCH 14/32] Fixed #81 --- pepdbagent/models.py | 8 +++++++- pepdbagent/modules/annotation.py | 8 ++++++-- pepdbagent/modules/project.py | 10 +++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 9f41a34..2ac1013 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -96,10 +96,16 @@ class UpdateModel(BaseModel): number_of_samples: Optional[int] @validator("tag", "name") - def tag_must_not_be_empty(cls, v): + def value_must_not_be_empty(cls, v): if "" == v: return None return v + @validator("tag", "name") + def value_must_be_lowercase(cls, v): + if v: + return v.lower() + return v + class Config: extra = Extra.forbid diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 54da130..ecb6051 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -327,9 +327,13 @@ def _add_order_by_keyword( ) order_by_obj = Projects.last_update_date - if desc: + if desc and by == "name": order_by_obj = order_by_obj.desc() + else: + if not desc: + order_by_obj = order_by_obj.desc() + return statement.order_by(order_by_obj) @staticmethod @@ -340,7 +344,7 @@ def _add_where_clause( admin_list: Union[str, List[str]] = None, ) -> Select: """ - Add where clause to sqlalchemy statement + Add where clause to sqlalchemy statement (in project search) :param statement: sqlalchemy representation of a SELECT statement. :param namespace: project namespace sql:(where namespace = "") diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 9752c20..9a7ca0e 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -53,6 +53,8 @@ def get( _subsample_dict: dict } """ + name = name.lower() + namespace = namespace.lower() with Session(self._sa_engine) as session: found_prj = session.execute( select( @@ -124,6 +126,8 @@ def delete( :param tag: Tag :return: None """ + name = name.lower() + namespace = namespace.lower() with self._sa_engine as engine: engine.execute( delete(Projects).where( @@ -182,11 +186,13 @@ def create( """ proj_dict = project.to_dict(extended=True) + namespace = namespace.lower() if name: + name = name.lower() proj_name = name proj_dict["name"] = name elif proj_dict["name"]: - proj_name = proj_dict["name"] + proj_name = proj_dict["name"].lower() else: raise ValueError( f"Name of the project wasn't provided. Project will not be uploaded." @@ -272,6 +278,8 @@ def _overwrite( :param private: boolean value if the project should be visible just for user that creates it. :return: None """ + proj_name = proj_name.lower() + namespace = namespace.lower() if self.exists(namespace=namespace, name=proj_name, tag=tag): _LOGGER.info(f"Updating {proj_name} project...") with self._sa_engine.begin() as engine: From fd359ef30b56061def8e5417767916ae8b1ee147 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Jun 2023 16:34:33 -0400 Subject: [PATCH 15/32] small refactoring --- pepdbagent/modules/annotation.py | 21 +++++++-------- pepdbagent/modules/namespace.py | 46 ++++++++++++++++++++++---------- pepdbagent/modules/project.py | 4 +-- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index ecb6051..f0b0248 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -5,13 +5,14 @@ from sqlalchemy import and_, or_ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session +from sqlalchemy.sql.selectable import Select from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, DEFAULT_TAG from pepdbagent.db_utils import Projects from pepdbagent.exceptions import ProjectNotFoundError, RegistryPathError from pepdbagent.models import AnnotationList, AnnotationModel from pepdbagent.utils import registry_path_converter, tuple_converter -from sqlalchemy.sql.selectable import Select + _LOGGER = logging.getLogger("pepdbagent") @@ -157,7 +158,7 @@ def _get_single_annotation( _LOGGER.info(f"Getting annotation of the project: '{namespace}/{name}:{tag}'") admin_tuple = tuple_converter(admin) - query = select( + statement = select( Projects.namespace, Projects.name, Projects.tag, @@ -180,7 +181,7 @@ def _get_single_annotation( ) with Session(self._sa_engine) as session: - query_result = session.execute(query).first() + query_result = session.execute(statement).first() if len(query_result) > 0: annot = AnnotationModel( @@ -219,11 +220,8 @@ def _count_projects( if admin is None: admin = [] statement = select(func.count()).select_from(Projects) - statement = self._add_where_clause( - statement, - namespace=namespace, - search_str=search_str, - admin_list=admin, + statement = self._add_condition( + statement, namespace=namespace, search_str=search_str, admin_list=admin ) with Session(self._sa_engine) as session: @@ -275,13 +273,14 @@ def _get_projects( Projects.digest, ).select_from(Projects) - statement = self._add_where_clause( + statement = self._add_condition( statement, namespace=namespace, search_str=search_str, admin_list=admin ) statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) + statement = statement.limit(limit).offset(offset) with Session(self._sa_engine) as session: - query_results = session.execute(statement.limit(limit).offset(offset)).all() + query_results = session.execute(statement).all() results_list = [] for result in query_results: @@ -337,7 +336,7 @@ def _add_order_by_keyword( return statement.order_by(order_by_obj) @staticmethod - def _add_where_clause( + def _add_condition( statement: Select, namespace: str = None, search_str: str = None, diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 10ee891..552e726 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -3,6 +3,7 @@ from sqlalchemy import Engine, and_, delete, distinct, func, insert, or_, select, update from sqlalchemy.orm import Session +from sqlalchemy.sql.selectable import Select from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET from pepdbagent.db_utils import Projects @@ -89,17 +90,13 @@ def _get_namespace( .select_from(Projects) ) - if search_str: - sql_search_str = f"%{search_str}%" - statement = statement.where( - or_( - Projects.namespace.ilike(sql_search_str), - ) - ) - statement = statement.where( - or_(Projects.private.is_(False), Projects.namespace.in_(admin_nsp)) + statement = self._add_condition( + statement=statement, + search_str=search_str, + admin_list=admin_nsp, ) statement = statement.limit(limit).offset(offset) + with Session(self._sa_engine) as session: query_results = session.execute(statement).all() @@ -125,6 +122,30 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = None) -> i statement = select( func.count(distinct(Projects.namespace)).label("number_of_namespaces") ).select_from(Projects) + statement = self._add_condition( + statement=statement, + search_str=search_str, + admin_list=admin_nsp, + ) + with Session(self._sa_engine) as session: + query_results = session.execute(statement).first() + + return query_results.number_of_namespaces + + @staticmethod + def _add_condition( + statement: Select, + search_str: str = None, + admin_list: Union[str, List[str]] = None, + ) -> Select: + """ + Add where clause to sqlalchemy statement (in namespace search) + + :param statement: sqlalchemy representation of a SELECT statement. + :param search_str: search string that has to be found namespace + :param admin_list: list or string of admin rights to namespace + :return: sqlalchemy representation of a SELECT statement with where clause. + """ if search_str: sql_search_str = f"%{search_str}%" statement = statement.where( @@ -133,9 +154,6 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = None) -> i ) ) statement = statement.where( - or_(Projects.private.is_(False), Projects.namespace.in_(admin_nsp)) + or_(Projects.private.is_(False), Projects.namespace.in_(admin_list)) ) - with Session(self._sa_engine) as session: - query_results = session.execute(statement).first() - - return query_results.number_of_namespaces + return statement diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 9a7ca0e..557b091 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -53,7 +53,7 @@ def get( _subsample_dict: dict } """ - name = name.lower() + # name = name.lower() namespace = namespace.lower() with Session(self._sa_engine) as session: found_prj = session.execute( @@ -126,7 +126,7 @@ def delete( :param tag: Tag :return: None """ - name = name.lower() + # name = name.lower() namespace = namespace.lower() with self._sa_engine as engine: engine.execute( From 4be89685d49a7a02f18b19276fe407dee787ea3a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Jun 2023 16:51:53 -0400 Subject: [PATCH 16/32] fixed #82 --- pepdbagent/db_utils.py | 7 +++---- pepdbagent/modules/project.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index e674d6b..bed184a 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -11,6 +11,7 @@ String, event, select, + TIMESTAMP, ) from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.engine import URL, create_engine @@ -44,7 +45,7 @@ def compile_jsonb_pg(type_, compiler, **kw): class Base(DeclarativeBase): - pass + type_annotation_map = {datetime.datetime: TIMESTAMP(timezone=True)} @event.listens_for(Base.metadata, "after_create") @@ -53,11 +54,9 @@ def receive_after_create(target, connection, tables, **kw): listen for the 'after_create' event """ if tables: - _LOGGER.warning("A table was created") - print("A table was created") + _LOGGER.info("A table was created") else: _LOGGER.info("A table was not created") - print("A table was not created") class Projects(Base): diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 557b091..2855b24 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -229,8 +229,12 @@ def create( project_value=proj_dict, number_of_samples=number_of_samples, private=is_private, - submission_date=datetime.datetime.now(), - last_update_date=datetime.datetime.now(), + submission_date=datetime.datetime.now( + datetime.timezone.utc + ), + last_update_date=datetime.datetime.now( + datetime.timezone.utc + ), ) ) @@ -293,7 +297,7 @@ def _overwrite( project_value=project_dict, number_of_samples=number_of_samples, private=private, - last_update_date=datetime.datetime.now(), + last_update_date=datetime.datetime.now(datetime.timezone.utc), ) .where( and_( @@ -384,7 +388,7 @@ def __create_update_dict(update_values: UpdateItems) -> dict: digest=create_digest( update_values.project_value.to_dict(extended=True) ), - last_update_date=datetime.datetime.now(), + last_update_date=datetime.datetime.now(datetime.timezone.utc), number_of_samples=len(update_values.project_value.samples), ) From 5df8385c45d01402b54770b1ad858895142320e4 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Sat, 3 Jun 2023 18:17:04 -0400 Subject: [PATCH 17/32] restructured sa statement execute callers --- pepdbagent/db_utils.py | 7 +++- pepdbagent/exceptions.py | 5 +++ pepdbagent/modules/annotation.py | 24 +++++------- pepdbagent/modules/namespace.py | 20 +++++----- pepdbagent/modules/project.py | 65 ++++++++++++++++---------------- pepdbagent/pepdbagent.py | 11 +++--- 6 files changed, 68 insertions(+), 64 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index bed184a..20de2ed 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -25,7 +25,7 @@ ) from pepdbagent.const import POSTGRES_DIALECT -from pepdbagent.exceptions import SchemaError +from pepdbagent.exceptions import SchemaError, RecordNotFoundError _LOGGER = logging.getLogger("pepdbagent") @@ -145,8 +145,13 @@ def session_execute(self, statement: Select) -> Result: SQLAlchemy's SQL expression language :return: query result represented with declarative base """ + _LOGGER.debug(f"Executing statement: {statement}") with Session(self._engine) as session: query_result = session.execute(statement) + + if not query_result.one_or_none(): + _LOGGER.error(f"Record with provided conditions not found") + raise RecordNotFoundError("Record with provided conditions not found") return query_result @property diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index afa7f2c..8c40661 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -28,3 +28,8 @@ def __init__(self, msg=""): class ProjectUniqueNameError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""{msg}""") + + +class RecordNotFoundError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""{msg}""") diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index f0b0248..57d4c2c 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -4,11 +4,10 @@ from sqlalchemy import Engine, func, select from sqlalchemy import and_, or_ from sqlalchemy.exc import IntegrityError -from sqlalchemy.orm import Session from sqlalchemy.sql.selectable import Select from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, DEFAULT_TAG -from pepdbagent.db_utils import Projects +from pepdbagent.db_utils import Projects, BaseEngine from pepdbagent.exceptions import ProjectNotFoundError, RegistryPathError from pepdbagent.models import AnnotationList, AnnotationModel from pepdbagent.utils import registry_path_converter, tuple_converter @@ -24,11 +23,12 @@ class PEPDatabaseAnnotation: While using this class, user can retrieve all necessary metadata about PEPs """ - def __init__(self, engine: Engine): + def __init__(self, pep_db_engine: BaseEngine): """ - :param engine: Connection to db represented by sqlalchemy engine + :param pep_db_engine: pepdbengine object with sa engine """ - self._sa_engine = engine + self._sa_engine = pep_db_engine.engine + self._pep_db_engine = pep_db_engine def get( self, @@ -179,9 +179,7 @@ def _get_single_annotation( ), ) ) - - with Session(self._sa_engine) as session: - query_result = session.execute(statement).first() + query_result = self._pep_db_engine.session_execute(statement).first() if len(query_result) > 0: annot = AnnotationModel( @@ -223,9 +221,7 @@ def _count_projects( statement = self._add_condition( statement, namespace=namespace, search_str=search_str, admin_list=admin ) - - with Session(self._sa_engine) as session: - result = session.execute(statement).first() + result = self._pep_db_engine.session_execute(statement).first() try: return result[0] @@ -279,8 +275,7 @@ def _get_projects( statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) statement = statement.limit(limit).offset(offset) - with Session(self._sa_engine) as session: - query_results = session.execute(statement).all() + query_results = self._pep_db_engine.session_execute(statement).all() results_list = [] for result in query_results: @@ -390,8 +385,7 @@ def get_project_number_in_namespace( or_(Projects.private.is_(False), Projects.namespace.in_(admin)) ) - with Session(self._sa_engine) as session: - result = session.execute(statement).first() + result = self._pep_db_engine.session_execute(statement).first() try: return result[0] diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 552e726..a6070e2 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -1,12 +1,11 @@ import logging from typing import List, Union -from sqlalchemy import Engine, and_, delete, distinct, func, insert, or_, select, update -from sqlalchemy.orm import Session +from sqlalchemy import distinct, func, or_, select from sqlalchemy.sql.selectable import Select from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET -from pepdbagent.db_utils import Projects +from pepdbagent.db_utils import Projects, BaseEngine from pepdbagent.models import Namespace, NamespaceList from pepdbagent.utils import tuple_converter @@ -20,11 +19,12 @@ class PEPDatabaseNamespace: While using this class, user can retrieve all necessary metadata about PEPs """ - def __init__(self, engine: Engine): + def __init__(self, pep_db_engine: BaseEngine): """ - :param engine: Connection to db represented by sqlalchemy engine + :param pep_db_engine: pepdbengine object with sa engine """ - self._sa_engine = engine + self._sa_engine = pep_db_engine.engine + self._pep_db_engine = pep_db_engine def get( self, @@ -96,9 +96,7 @@ def _get_namespace( admin_list=admin_nsp, ) statement = statement.limit(limit).offset(offset) - - with Session(self._sa_engine) as session: - query_results = session.execute(statement).all() + query_results = self._pep_db_engine.session_execute(statement).all() results_list = [] for res in query_results: @@ -127,8 +125,8 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = None) -> i search_str=search_str, admin_list=admin_nsp, ) - with Session(self._sa_engine) as session: - query_results = session.execute(statement).first() + + query_results = self._pep_db_engine.session_execute(statement).first() return query_results.number_of_namespaces diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 2855b24..f9f2f74 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -6,14 +6,14 @@ import peppy from sqlalchemy import Engine, and_, delete, insert, or_, select, update from sqlalchemy.exc import IntegrityError -from sqlalchemy.orm import Session from pepdbagent.const import * -from pepdbagent.db_utils import Projects +from pepdbagent.db_utils import Projects, BaseEngine from pepdbagent.exceptions import ProjectNotFoundError, ProjectUniqueNameError from pepdbagent.models import UpdateItems, UpdateModel from pepdbagent.utils import create_digest, registry_path_converter + _LOGGER = logging.getLogger("pepdbagent") @@ -24,11 +24,12 @@ class PEPDatabaseProject: While using this class, user can retrieve projects from database """ - def __init__(self, engine: Engine): + def __init__(self, pep_db_engine: BaseEngine): """ - :param engine: Connection to db represented by sqlalchemy engine + :param pep_db_engine: pepdbengine object with sa engine """ - self._sa_engine = engine + self._sa_engine = pep_db_engine.engine + self._pep_db_engine = pep_db_engine def get( self, @@ -55,21 +56,22 @@ def get( """ # name = name.lower() namespace = namespace.lower() - with Session(self._sa_engine) as session: - found_prj = session.execute( - select( - Projects.namespace, - Projects.name, - Projects.project_value, - Projects.private, - ).where( - and_( - Projects.namespace == namespace, - Projects.name == name, - Projects.tag == tag, - ) - ) - ).one() + statement = select( + Projects.namespace, + Projects.name, + Projects.project_value, + Projects.private, + ) + + statement = statement.where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + + found_prj = self._pep_db_engine.session_execute(statement).one() if found_prj: _LOGGER.info( @@ -422,18 +424,17 @@ def exists( :param tag: project tag :return: Returning True if project exist """ - with Session(self._sa_engine) as session: - found_prj = session.execute( - select( - Projects.id, - ).where( - and_( - Projects.namespace == namespace, - Projects.name == name, - Projects.tag == tag, - ) - ) - ).all() + + statement = select(Projects.id) + statement = statement.where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + found_prj = self._pep_db_engine.session_execute(statement).all() + if len(found_prj) > 0: return True else: diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 85a5188..e73500a 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -30,7 +30,7 @@ def __init__( (e.g. "localhost://username:password@pdp_db:5432") """ - sa_engine = BaseEngine( + pep_db_engine = BaseEngine( host=host, port=port, database=database, @@ -39,13 +39,14 @@ def __init__( drivername=drivername, dsn=dsn, echo=echo, - ).engine + ) + sa_engine = pep_db_engine.engine self.__sa_engine = sa_engine - self.__project = PEPDatabaseProject(sa_engine) - self.__annotation = PEPDatabaseAnnotation(sa_engine) - self.__namespace = PEPDatabaseNamespace(sa_engine) + self.__project = PEPDatabaseProject(pep_db_engine) + self.__annotation = PEPDatabaseAnnotation(pep_db_engine) + self.__namespace = PEPDatabaseNamespace(pep_db_engine) self.__db_name = database From a6d43ef4437f694329d1348f15a66c1d33f19f7c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Sat, 3 Jun 2023 18:35:54 -0400 Subject: [PATCH 18/32] fixed executions --- pepdbagent/db_utils.py | 7 ++----- pepdbagent/exceptions.py | 5 ----- pepdbagent/modules/annotation.py | 8 ++++---- pepdbagent/modules/namespace.py | 4 ++-- pepdbagent/modules/project.py | 20 ++++++++++---------- 5 files changed, 18 insertions(+), 26 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 20de2ed..940c351 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -25,7 +25,7 @@ ) from pepdbagent.const import POSTGRES_DIALECT -from pepdbagent.exceptions import SchemaError, RecordNotFoundError +from pepdbagent.exceptions import SchemaError _LOGGER = logging.getLogger("pepdbagent") @@ -137,7 +137,7 @@ def create_schema(self, engine=None): Base.metadata.create_all(engine) return None - def session_execute(self, statement: Select) -> Result: + def session_execute_first(self, statement: Select) -> Result: """ Execute statement using sqlalchemy statement @@ -149,9 +149,6 @@ def session_execute(self, statement: Select) -> Result: with Session(self._engine) as session: query_result = session.execute(statement) - if not query_result.one_or_none(): - _LOGGER.error(f"Record with provided conditions not found") - raise RecordNotFoundError("Record with provided conditions not found") return query_result @property diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index 8c40661..afa7f2c 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -28,8 +28,3 @@ def __init__(self, msg=""): class ProjectUniqueNameError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""{msg}""") - - -class RecordNotFoundError(PEPDatabaseAgentError): - def __init__(self, msg=""): - super().__init__(f"""{msg}""") diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 57d4c2c..f68b74c 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -179,7 +179,7 @@ def _get_single_annotation( ), ) ) - query_result = self._pep_db_engine.session_execute(statement).first() + query_result = self._pep_db_engine.session_execute_first(statement).first() if len(query_result) > 0: annot = AnnotationModel( @@ -221,7 +221,7 @@ def _count_projects( statement = self._add_condition( statement, namespace=namespace, search_str=search_str, admin_list=admin ) - result = self._pep_db_engine.session_execute(statement).first() + result = self._pep_db_engine.session_execute_first(statement).first() try: return result[0] @@ -275,7 +275,7 @@ def _get_projects( statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) statement = statement.limit(limit).offset(offset) - query_results = self._pep_db_engine.session_execute(statement).all() + query_results = self._pep_db_engine.session_execute_first(statement).all() results_list = [] for result in query_results: @@ -385,7 +385,7 @@ def get_project_number_in_namespace( or_(Projects.private.is_(False), Projects.namespace.in_(admin)) ) - result = self._pep_db_engine.session_execute(statement).first() + result = self._pep_db_engine.session_execute_first(statement).first() try: return result[0] diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index a6070e2..19d4f6a 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -96,7 +96,7 @@ def _get_namespace( admin_list=admin_nsp, ) statement = statement.limit(limit).offset(offset) - query_results = self._pep_db_engine.session_execute(statement).all() + query_results = self._pep_db_engine.session_execute_first(statement).all() results_list = [] for res in query_results: @@ -126,7 +126,7 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = None) -> i admin_list=admin_nsp, ) - query_results = self._pep_db_engine.session_execute(statement).first() + query_results = self._pep_db_engine.session_execute_first(statement).first() return query_results.number_of_namespaces diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index f9f2f74..59c337f 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -71,7 +71,7 @@ def get( ) ) - found_prj = self._pep_db_engine.session_execute(statement).one() + found_prj = self._pep_db_engine.session_execute_first(statement).one() if found_prj: _LOGGER.info( @@ -130,8 +130,8 @@ def delete( """ # name = name.lower() namespace = namespace.lower() - with self._sa_engine as engine: - engine.execute( + with self._sa_engine.begin() as conn: + conn.execute( delete(Projects).where( and_( Projects.namespace == namespace, @@ -221,8 +221,8 @@ def create( try: _LOGGER.info(f"Uploading {namespace}/{proj_name}:{tag} project...") - with self._sa_engine.begin() as eng: - eng.execute( + with self._sa_engine.begin() as conn: + conn.execute( insert(Projects).values( namespace=namespace, name=proj_name, @@ -288,8 +288,8 @@ def _overwrite( namespace = namespace.lower() if self.exists(namespace=namespace, name=proj_name, tag=tag): _LOGGER.info(f"Updating {proj_name} project...") - with self._sa_engine.begin() as engine: - engine.execute( + with self._sa_engine.begin() as conn: + conn.execute( update(Projects) .values( namespace=namespace, @@ -363,8 +363,8 @@ def update( .values(update_values) ) - with self._sa_engine.begin() as engine: - engine.execute(update_stmt) + with self._sa_engine.begin() as conn: + conn.execute(update_stmt) return None @@ -433,7 +433,7 @@ def exists( Projects.tag == tag, ) ) - found_prj = self._pep_db_engine.session_execute(statement).all() + found_prj = self._pep_db_engine.session_execute_first(statement).all() if len(found_prj) > 0: return True From 842133cdc58da041db46ebf299acf1f1f96a3907 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Sat, 3 Jun 2023 18:37:02 -0400 Subject: [PATCH 19/32] fixed naming --- pepdbagent/db_utils.py | 2 +- pepdbagent/modules/annotation.py | 8 ++++---- pepdbagent/modules/namespace.py | 4 ++-- pepdbagent/modules/project.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 940c351..8cf7f2f 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -137,7 +137,7 @@ def create_schema(self, engine=None): Base.metadata.create_all(engine) return None - def session_execute_first(self, statement: Select) -> Result: + def session_execute(self, statement: Select) -> Result: """ Execute statement using sqlalchemy statement diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index f68b74c..57d4c2c 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -179,7 +179,7 @@ def _get_single_annotation( ), ) ) - query_result = self._pep_db_engine.session_execute_first(statement).first() + query_result = self._pep_db_engine.session_execute(statement).first() if len(query_result) > 0: annot = AnnotationModel( @@ -221,7 +221,7 @@ def _count_projects( statement = self._add_condition( statement, namespace=namespace, search_str=search_str, admin_list=admin ) - result = self._pep_db_engine.session_execute_first(statement).first() + result = self._pep_db_engine.session_execute(statement).first() try: return result[0] @@ -275,7 +275,7 @@ def _get_projects( statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) statement = statement.limit(limit).offset(offset) - query_results = self._pep_db_engine.session_execute_first(statement).all() + query_results = self._pep_db_engine.session_execute(statement).all() results_list = [] for result in query_results: @@ -385,7 +385,7 @@ def get_project_number_in_namespace( or_(Projects.private.is_(False), Projects.namespace.in_(admin)) ) - result = self._pep_db_engine.session_execute_first(statement).first() + result = self._pep_db_engine.session_execute(statement).first() try: return result[0] diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 19d4f6a..a6070e2 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -96,7 +96,7 @@ def _get_namespace( admin_list=admin_nsp, ) statement = statement.limit(limit).offset(offset) - query_results = self._pep_db_engine.session_execute_first(statement).all() + query_results = self._pep_db_engine.session_execute(statement).all() results_list = [] for res in query_results: @@ -126,7 +126,7 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = None) -> i admin_list=admin_nsp, ) - query_results = self._pep_db_engine.session_execute_first(statement).first() + query_results = self._pep_db_engine.session_execute(statement).first() return query_results.number_of_namespaces diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 59c337f..19aba37 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -71,7 +71,7 @@ def get( ) ) - found_prj = self._pep_db_engine.session_execute_first(statement).one() + found_prj = self._pep_db_engine.session_execute(statement).one() if found_prj: _LOGGER.info( @@ -433,7 +433,7 @@ def exists( Projects.tag == tag, ) ) - found_prj = self._pep_db_engine.session_execute_first(statement).all() + found_prj = self._pep_db_engine.session_execute(statement).all() if len(found_prj) > 0: return True From 325584ff9098b6d69a02005e30ee61d0afe38163 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 5 Jun 2023 13:17:43 -0400 Subject: [PATCH 20/32] db utils update --- Makefile | 14 ++++++++++++++ pepdbagent/db_utils.py | 35 +++++++---------------------------- 2 files changed, 21 insertions(+), 28 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..067ac8a --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +lint: + # black should be last in the list, as it lint the code. Tests can fail if order will be different + flake8 && isort . && black . + +run-coverage: + coverage run -m pytest + +html-report: + coverage html + +open-coverage: + cd htmlcov && google-chrome index.html + +coverage: run-coverage html-report open-coverage \ No newline at end of file diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 8cf7f2f..4f062e5 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -72,7 +72,7 @@ class Projects(Base): number_of_samples: Mapped[int] submission_date: Mapped[datetime.datetime] last_update_date: Mapped[datetime.datetime] - # schema: Mapped[Optional[str]] + schema: Mapped[Optional[str]] __table_args__ = (PrimaryKeyConstraint("namespace", "name", "tag", name="id"),) @@ -118,12 +118,7 @@ def __init__( self._engine = create_engine(dsn, echo=echo) self.create_schema(self._engine) - - session = Session(self._engine) - try: - session.execute(select(Projects).limit(1)).first() - except ProgrammingError: - raise SchemaError() + self.check_db_connection() def create_schema(self, engine=None): """ @@ -171,24 +166,8 @@ def _start_session(self): return session - @staticmethod - def _create_dsn_string( - host: str = "localhost", - port: int = 5432, - database: str = "pep-db", - user: str = None, - password: str = None, - dialect: str = POSTGRES_DIALECT, - ) -> str: - """ - Using host, port, database, user, and password and dialect - - :param host: database server address e.g., localhost or an IP address. - :param port: the port number that defaults to 5432 if it is not provided. - :param database: the name of the database that you want to connect. - :param user: the username used to authenticate. - :param password: password used to authenticate. - :param dialect: DB dialect, specific implementation or variant of a database system. [Default: postgresql] - :return: sqlalchemy connection string - """ - return f"{dialect}://{user}:{password}@{host}:{port}/{database}" + def check_db_connection(self): + try: + self.session_execute(select(Projects).limit(1)) + except ProgrammingError: + raise SchemaError() From 642bf4b4c29612391b82a2098bca06a01b49bcec Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 5 Jun 2023 13:45:19 -0400 Subject: [PATCH 21/32] added new way of testing --- .github/workflows/new_db.yml | 48 ++ pepdbagent/db_utils.py | 2 +- pepdbagent/modules/annotation.py | 4 +- pepdbagent/modules/project.py | 11 +- requirements/requirements-all.txt | 1 + sample_pep/old/project_config.yaml | 25 - sample_pep/old/sample_table.csv | 23 - .../subtable_automerge/sample_table.csv | 5 - .../subtable_automerge/subsample_table.csv | 7 - tests/conftest.py | 94 ++- tests/data/basic_pep/project_config.yaml | 2 - tests/data/basic_pep/sample_table.csv | 3 - .../BiocProject/data/laminB1Lads.bed | 0 .../BiocProject/data/vistaEnhancers.bed | 0 .../BiocProject/project_config.yaml | 0 .../BiocProject/project_config_resize.yaml | 0 .../namespace1}/BiocProject/readBedFiles.R | 0 .../BiocProject/readBedFiles_resize.R | 0 .../namespace1}/BiocProject/sample_table.csv | 4 +- .../amendments1/project_config.yaml | 0 .../namespace1/amendments1}/sample_table.csv | 8 +- .../amendments1/sample_table_newLib.csv | 0 .../amendments1/sample_table_newLib2.csv | 0 .../amendments1/sample_table_pre.csv | 0 .../amendments2/project_config.yaml | 0 .../namespace1}/amendments2/sample_table.csv | 12 +- .../amendments2/sample_table_noFrog.csv | 8 +- .../amendments2/sample_table_pre.csv | 0 .../namespace1}/append/project_config.yaml | 0 .../data/namespace1/append}/sample_table.csv | 8 +- .../namespace1}/append/sample_table_pre.csv | 8 +- .../namespace1}/automerge/project_config.yaml | 0 .../namespace1}/automerge/sample_table.csv | 0 .../namespace1}/basic/project_config.yaml | 0 .../data/namespace1}/basic/sample_table.csv | 0 .../project_config.yaml | 0 .../readBedFilesExceptions.R | 0 .../BiocProject_exceptions/sample_table.csv | 4 +- .../BiocProject_remote/project_config.yaml | 0 .../project_config_resize.yaml | 0 .../BiocProject_remote/readRemoteData.R | 0 .../readRemoteData_resize.R | 0 .../BiocProject_remote/sample_table.csv | 0 .../custom_index/project_config.yaml | 0 .../namespace2}/custom_index/sample_table.csv | 0 .../namespace2}/derive/project_config.yaml | 0 .../data/namespace2/derive}/sample_table.csv | 8 +- .../namespace2}/derive/sample_table_pre.csv | 0 .../derive_imply/project_config.yaml | 0 .../namespace2}/derive_imply/sample_table.csv | 0 .../derive_imply/sample_table_pre.csv | 0 .../namespace2}/duplicate/project_config.yaml | 0 .../namespace2/duplicate}/sample_table.csv | 10 +- .../namespace2}/imply/project_config.yaml | 0 .../data/namespace2}/imply/sample_table.csv | 0 .../namespace2}/imply/sample_table_pre.csv | 0 .../namespace2}/imports/project_config.yaml | 0 .../namespace2}/imports/project_config1.yaml | 0 .../data/namespace2}/imports/sample_table.csv | 0 .../node_alias/project_config.yaml | 0 .../node_alias/project_config1.yaml | 0 .../namespace3}/node_alias/sample_table.csv | 0 .../namespace3}/noname/project_config.yaml | 0 .../noname/project_config_noname.yaml | 0 .../data/namespace3}/noname/sample_table.csv | 10 +- .../namespace3}/piface/annotation_sheet.csv | 6 +- .../namespace3}/piface/output_schema.yaml | 0 .../piface/output_schema_project.yaml | 0 .../piface/output_schema_sample.yaml | 0 .../piface/pipeline_interface1_project.yaml | 0 .../piface/pipeline_interface1_sample.yaml | 0 .../piface/pipeline_interface2_project.yaml | 0 .../piface/pipeline_interface2_sample.yaml | 0 .../namespace3}/piface/project_config.yaml | 0 .../data/namespace3}/piface/readData.R | 0 .../namespace3}/piface/resources-project.tsv | 0 .../namespace3}/piface/resources-sample.tsv | 0 .../namespace3}/remove/project_config.yaml | 0 .../data/namespace3/remove}/sample_table.csv | 8 +- .../namespace3}/subtable1/project_config.yaml | 0 .../namespace3}/subtable1/sample_table.csv | 0 .../namespace3}/subtable1/subsample_table.csv | 0 .../namespace3}/subtable2/project_config.yaml | 0 .../namespace3}/subtable2/sample_table.csv | 0 .../namespace3}/subtable2/subsample_table.csv | 0 .../namespace3}/subtable3/project_config.yaml | 0 .../namespace3}/subtable3/sample_table.csv | 0 .../namespace3}/subtable3/subsample_table.csv | 0 .../namespace3}/subtable4/project_config.yaml | 0 .../namespace3}/subtable4/sample_table.csv | 0 .../namespace3}/subtable4/subsample_table.csv | 0 .../namespace3}/subtable5/project_config.yaml | 0 .../namespace3}/subtable5/sample_table.csv | 0 .../namespace3}/subtable5/subsample_table.csv | 0 .../namespace3}/subtables/project_config.yaml | 0 .../namespace3}/subtables/sample_table.csv | 0 .../namespace3}/subtables/subsample_table.csv | 0 .../subtables/subsample_table1.csv | 0 .../project_config.yaml | 6 + .../readBedFilesExceptions.R | 15 + .../BiocProject_exceptions/sample_table.csv | 3 + .../amendments1/project_config.yaml | 16 + .../private_test/amendments1/sample_table.csv | 5 + .../amendments1/sample_table_newLib.csv | 5 + .../amendments1/sample_table_newLib2.csv | 5 + .../amendments1/sample_table_pre.csv | 5 + .../private_test/append}/project_config.yaml | 5 +- .../data/private_test/append/sample_table.csv | 5 + .../private_test/append/sample_table_pre.csv | 5 + .../private_test/derive/project_config.yaml | 10 + .../data/private_test/derive/sample_table.csv | 5 + .../private_test/derive/sample_table_pre.csv | 5 + .../private_test/remove/project_config.yaml | 12 + .../data/private_test/remove/sample_table.csv | 5 + .../subtable3/project_config.yaml | 13 + .../private_test/subtable3/sample_table.csv | 5 + .../subtable3/subsample_table.csv | 4 + tests/test_pepagent.py | 691 ++++++++---------- 118 files changed, 579 insertions(+), 565 deletions(-) create mode 100644 .github/workflows/new_db.yml delete mode 100644 sample_pep/old/project_config.yaml delete mode 100644 sample_pep/old/sample_table.csv delete mode 100644 sample_pep/subtable_automerge/sample_table.csv delete mode 100644 sample_pep/subtable_automerge/subsample_table.csv delete mode 100644 tests/data/basic_pep/project_config.yaml delete mode 100644 tests/data/basic_pep/sample_table.csv rename {sample_pep => tests/data/namespace1}/BiocProject/data/laminB1Lads.bed (100%) rename {sample_pep => tests/data/namespace1}/BiocProject/data/vistaEnhancers.bed (100%) rename {sample_pep => tests/data/namespace1}/BiocProject/project_config.yaml (100%) rename {sample_pep => tests/data/namespace1}/BiocProject/project_config_resize.yaml (100%) rename {sample_pep => tests/data/namespace1}/BiocProject/readBedFiles.R (100%) rename {sample_pep => tests/data/namespace1}/BiocProject/readBedFiles_resize.R (100%) rename {sample_pep => tests/data/namespace1}/BiocProject/sample_table.csv (97%) rename {sample_pep => tests/data/namespace1}/amendments1/project_config.yaml (100%) rename {sample_pep/derive => tests/data/namespace1/amendments1}/sample_table.csv (97%) mode change 100755 => 100644 rename {sample_pep => tests/data/namespace1}/amendments1/sample_table_newLib.csv (100%) rename {sample_pep => tests/data/namespace1}/amendments1/sample_table_newLib2.csv (100%) rename {sample_pep => tests/data/namespace1}/amendments1/sample_table_pre.csv (100%) rename {sample_pep => tests/data/namespace1}/amendments2/project_config.yaml (100%) rename {sample_pep => tests/data/namespace1}/amendments2/sample_table.csv (98%) rename {sample_pep => tests/data/namespace1}/amendments2/sample_table_noFrog.csv (98%) rename {sample_pep => tests/data/namespace1}/amendments2/sample_table_pre.csv (100%) rename {sample_pep => tests/data/namespace1}/append/project_config.yaml (100%) rename {sample_pep/duplicate => tests/data/namespace1/append}/sample_table.csv (95%) rename {sample_pep => tests/data/namespace1}/append/sample_table_pre.csv (96%) rename {sample_pep => tests/data/namespace1}/automerge/project_config.yaml (100%) rename {sample_pep => tests/data/namespace1}/automerge/sample_table.csv (100%) rename {sample_pep => tests/data/namespace1}/basic/project_config.yaml (100%) rename {sample_pep => tests/data/namespace1}/basic/sample_table.csv (100%) rename {sample_pep => tests/data/namespace2}/BiocProject_exceptions/project_config.yaml (100%) rename {sample_pep => tests/data/namespace2}/BiocProject_exceptions/readBedFilesExceptions.R (100%) rename {sample_pep => tests/data/namespace2}/BiocProject_exceptions/sample_table.csv (97%) rename {sample_pep => tests/data/namespace2}/BiocProject_remote/project_config.yaml (100%) rename {sample_pep => tests/data/namespace2}/BiocProject_remote/project_config_resize.yaml (100%) rename {sample_pep => tests/data/namespace2}/BiocProject_remote/readRemoteData.R (100%) rename {sample_pep => tests/data/namespace2}/BiocProject_remote/readRemoteData_resize.R (100%) rename {sample_pep => tests/data/namespace2}/BiocProject_remote/sample_table.csv (100%) rename {sample_pep => tests/data/namespace2}/custom_index/project_config.yaml (100%) rename {sample_pep => tests/data/namespace2}/custom_index/sample_table.csv (100%) rename {sample_pep => tests/data/namespace2}/derive/project_config.yaml (100%) rename {sample_pep/remove => tests/data/namespace2/derive}/sample_table.csv (97%) mode change 100644 => 100755 rename {sample_pep => tests/data/namespace2}/derive/sample_table_pre.csv (100%) rename {sample_pep => tests/data/namespace2}/derive_imply/project_config.yaml (100%) rename {sample_pep => tests/data/namespace2}/derive_imply/sample_table.csv (100%) rename {sample_pep => tests/data/namespace2}/derive_imply/sample_table_pre.csv (100%) rename {sample_pep => tests/data/namespace2}/duplicate/project_config.yaml (100%) rename {sample_pep/append => tests/data/namespace2/duplicate}/sample_table.csv (94%) rename {sample_pep => tests/data/namespace2}/imply/project_config.yaml (100%) rename {sample_pep => tests/data/namespace2}/imply/sample_table.csv (100%) rename {sample_pep => tests/data/namespace2}/imply/sample_table_pre.csv (100%) rename {sample_pep => tests/data/namespace2}/imports/project_config.yaml (100%) rename {sample_pep => tests/data/namespace2}/imports/project_config1.yaml (100%) rename {sample_pep => tests/data/namespace2}/imports/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/node_alias/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/node_alias/project_config1.yaml (100%) rename {sample_pep => tests/data/namespace3}/node_alias/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/noname/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/noname/project_config_noname.yaml (100%) rename {sample_pep => tests/data/namespace3}/noname/sample_table.csv (94%) rename {sample_pep => tests/data/namespace3}/piface/annotation_sheet.csv (98%) rename {sample_pep => tests/data/namespace3}/piface/output_schema.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/output_schema_project.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/output_schema_sample.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/pipeline_interface1_project.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/pipeline_interface1_sample.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/pipeline_interface2_project.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/pipeline_interface2_sample.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/piface/readData.R (100%) rename {sample_pep => tests/data/namespace3}/piface/resources-project.tsv (100%) rename {sample_pep => tests/data/namespace3}/piface/resources-sample.tsv (100%) rename {sample_pep => tests/data/namespace3}/remove/project_config.yaml (100%) rename {sample_pep/amendments1 => tests/data/namespace3/remove}/sample_table.csv (97%) rename {sample_pep => tests/data/namespace3}/subtable1/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/subtable1/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable1/subsample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable2/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/subtable2/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable2/subsample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable3/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/subtable3/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable3/subsample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable4/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/subtable4/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable4/subsample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable5/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/subtable5/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtable5/subsample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtables/project_config.yaml (100%) rename {sample_pep => tests/data/namespace3}/subtables/sample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtables/subsample_table.csv (100%) rename {sample_pep => tests/data/namespace3}/subtables/subsample_table1.csv (100%) create mode 100644 tests/data/private_test/BiocProject_exceptions/project_config.yaml create mode 100644 tests/data/private_test/BiocProject_exceptions/readBedFilesExceptions.R create mode 100644 tests/data/private_test/BiocProject_exceptions/sample_table.csv create mode 100644 tests/data/private_test/amendments1/project_config.yaml create mode 100644 tests/data/private_test/amendments1/sample_table.csv create mode 100644 tests/data/private_test/amendments1/sample_table_newLib.csv create mode 100644 tests/data/private_test/amendments1/sample_table_newLib2.csv create mode 100644 tests/data/private_test/amendments1/sample_table_pre.csv rename {sample_pep/subtable_automerge => tests/data/private_test/append}/project_config.yaml (50%) create mode 100644 tests/data/private_test/append/sample_table.csv create mode 100644 tests/data/private_test/append/sample_table_pre.csv create mode 100644 tests/data/private_test/derive/project_config.yaml create mode 100755 tests/data/private_test/derive/sample_table.csv create mode 100755 tests/data/private_test/derive/sample_table_pre.csv create mode 100644 tests/data/private_test/remove/project_config.yaml create mode 100644 tests/data/private_test/remove/sample_table.csv create mode 100644 tests/data/private_test/subtable3/project_config.yaml create mode 100644 tests/data/private_test/subtable3/sample_table.csv create mode 100644 tests/data/private_test/subtable3/subsample_table.csv diff --git a/.github/workflows/new_db.yml b/.github/workflows/new_db.yml new file mode 100644 index 0000000..3c53b5e --- /dev/null +++ b/.github/workflows/new_db.yml @@ -0,0 +1,48 @@ +name: Test bedstat pipeline + +on: + push: + branches: [master, dev, sqlalchemy_testing] + pull_request: + branches: [master, dev] + +jobs: + pytest: + strategy: + matrix: + python-version: [3.11] + os: [ubuntu-latest] # can't use macOS when using service containers or container jobs + r: [release] + runs-on: ${{ matrix.os }} + services: + postgres: + image: postgres + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: docker + POSTGRES_DB: pep-db + POSTGRES_HOST: localhost + ports: + - 5432:5432 + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dev dependancies + run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi + + - name: Install test dependancies + run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi + + - name: Install package + run: python -m pip install . + + - name: Run pytest tests + run: pytest tests -x -vv + + diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 4f062e5..d45635e 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -72,7 +72,7 @@ class Projects(Base): number_of_samples: Mapped[int] submission_date: Mapped[datetime.datetime] last_update_date: Mapped[datetime.datetime] - schema: Mapped[Optional[str]] + # schema: Mapped[Optional[str]] __table_args__ = (PrimaryKeyConstraint("namespace", "name", "tag", name="id"),) diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 57d4c2c..ff9e12a 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -292,7 +292,8 @@ def _get_projects( digest=result.digest, ) ) - + if not order_desc: + results_list.reverse() return results_list @staticmethod @@ -346,6 +347,7 @@ def _add_condition( :param admin_list: list or string of admin rights to namespace :return: sqlalchemy representation of a SELECT statement with where clause. """ + admin_list = tuple_converter(admin_list) if search_str: sql_search_str = f"%{search_str}%" search_query = or_( diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 19aba37..d705bdc 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -130,6 +130,11 @@ def delete( """ # name = name.lower() namespace = namespace.lower() + + if not self.exists(namespace=namespace, name=name, tag=tag): + raise ProjectNotFoundError( + f"Can't delete unexciting project: '{namespace}/{name}:{tag}'." + ) with self._sa_engine.begin() as conn: conn.execute( delete(Projects).where( @@ -143,11 +148,6 @@ def delete( _LOGGER.info(f"Project '{namespace}/{name}:{tag} was successfully deleted'") - if not self.exists(namespace=namespace, name=name, tag=tag): - raise ProjectNotFoundError( - f"Can't delete unexciting project: '{namespace}/{name}:{tag}'." - ) - def delete_by_rp( self, registry_path: str, @@ -168,6 +168,7 @@ def create( name: str = None, tag: str = DEFAULT_TAG, is_private: bool = False, + # schema: str = None, overwrite: bool = False, update_only: bool = False, ) -> None: diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index e93990f..3f46706 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -5,3 +5,4 @@ ubiquerg>=0.6.2 coloredlogs>=15.0.1 pytest-mock pydantic +psycopg2 diff --git a/sample_pep/old/project_config.yaml b/sample_pep/old/project_config.yaml deleted file mode 100644 index afa01c2..0000000 --- a/sample_pep/old/project_config.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: test_pipeline - -metadata: - sample_table: "sample_table.csv" - output_dir: "$HOME" - pipeline_interfaces: "$HOME/pipeline_interface.yaml" - -derived_columns: [read1, read2] - -data_sources: - R: "$DATA/sra_fastq/{srr}.fastq.gz" - S1: "$DATA/test/fastq/{srr}.fastq.gz" - H1: "$DATA/test/fastq/{srr}_PE1.fastq.gz" - H2: "$DATA/test/fastq/{srr}_PE2.fastq.gz" - -# implied_columns: -# organism: -# human: -# genome: hg38 -# prealignments: human_rDNA -# umi_status: -# true_8: -# umi_len: 8 -# true_6: -# umi_len: 6 diff --git a/sample_pep/old/sample_table.csv b/sample_pep/old/sample_table.csv deleted file mode 100644 index cb0bfa8..0000000 --- a/sample_pep/old/sample_table.csv +++ /dev/null @@ -1,23 +0,0 @@ -sample_name,sample_desc,treatment,replicate,toggle,protocol,organism,read_type,cell_type,purpose,umi_status,umi_length,data_source,read1,read2,srr,srx,Sample_title,Sample_geo_accession,Sample_source_name_ch1,Sample_characteristics_ch1,Sample_extract_protocol_ch1,Sample_description,Sample_data_processing,Sample_platform_id,Sample_contact_name,Sample_contact_email,Sample_contact_laboratory,Sample_contact_department,Sample_contact_institute,Sample_contact_address,Sample_contact_city,Sample_contact_zip/postal_code,Sample_contact_country,Sample_instrument_model,Sample_library_selection,Sample_library_source,Sample_library_strategy,Sample_relation,gsm_id,Sample_supplementary_file_1,Sample_supplementary_file_2,Sample_series_id -K562_PRO-seq,K562 PRO-seq,none,1,1,PRO,human,SINGLE,K562,gold standard,FALSE,0,SRA,R,,SRR155431[1-2],SRX683602,K562 PRO-Seq,GSM1480327,K562 cells,pull-down substrate: streptavidin,"PRO-seq libraries were prepared as described previously (Kwak et al., 2013). Briefly, 5×10^6 nuclei were added to 2 X Nuclear Run-On (NRO) reaction mixture (10 mM Tris-HCl pH 8.0, 300 mM KCl, 1% Sarkosyl, 5 mM MgCl2, 1 mM DTT, 0.375 mM each of biotin-11-A/C/G/UTP (Perkin-Elmer), 0.8 u/µl RNase inhibitor) and incubated for 3 min at 30˚C. Nascent RNA was extracted and fragmented by base hydrolysis in 0.2 N NaOH on ice for 10~12 min, and neutralized by adding 1 X volume of 1 M Tris-HCl pH 6.8. Fragmented nascent RNA was purified using streptavidin beads, ligated with reverse 3' RNA adapter (5'p-GAUCGUCGGACUG-UAGAACUCUGAAC-/3'InvdT/), and biotin-labeled products were enriched by another round of streptavidin bead binding and extraction. For 5' end repair, the RNA products were successively treated with tobacco acid pyrophosphatase (TAP, Epicentre) and polynucleotide kinase (PNK, NEB). 5' repaired RNA was ligated to reverse 5' RNA adaptor (5'-CCUUGGCACCCGAGAAUUCCA-3') before being further purified by the third round of streptavidin bead binding and extraction. RNA was reverse transcribed using 25 pmol Illumina RP1 primer (5'-AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA-3'). The product was amplified 15 cycles and products greater than 120 bp (insert > 70 bp) were PAGE purified before being analyzed by Illumina HiSeq 2500 instrument.",This Sample represents two sequencing replicates.,Supplementary_files_format_and_content: Processed data files are bigWigs. Each entry represents the number of reads at each base.,GPL16791,"Leighton,James,Core",ljc37@cornell.edu,John T. Lis,Moleular Biology and Genetics,Cornell University,417 Biotechnology Building,Ithaca,14853,USA,Illumina HiSeq 2500,other,transcriptomic,OTHER,SRA: https://www.ncbi.nlm.nih.gov/sra?term,GSM1480327,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM1480nnn/GSM1480327/suppl/GSM1480327_K562_PROseq_minus.bw,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM1480nnn/GSM1480327/suppl/GSM1480327_K562_PROseq_plus.bw,GSE60456 -K562_RNA-seq_10,90% K562 PRO-seq + 10% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_10pct_RNArc_r2,,K562 RNA spike-in 10pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_20,80% K562 PRO-seq + 20% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_20pct_RNArc_r2,,K562 RNA spike-in 20pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_30,70% K562 PRO-seq + 30% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_30pct_RNArc_r2,,K562 RNA spike-in 30pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_40,60% K562 PRO-seq + 40% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_40pct_RNArc_r2,,K562 RNA spike-in 40pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_50,50% K562 PRO-seq + 50% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_50pct_RNArc_r2,,K562 RNA spike-in 50pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_60,40% K562 PRO-seq + 60% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_60pct_RNArc_r2,,K562 RNA spike-in 60pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_70,30% K562 PRO-seq + 70% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_70pct_RNArc_r2,,K562 RNA spike-in 70pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_80,20% K562 PRO-seq + 80% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_80pct_RNArc_r2,,K562 RNA spike-in 80pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_RNA-seq_90,10% K562 PRO-seq + 90% K562 RNA-seq,none,1,1,PRO,human,SINGLE,K562,mRNA contamination; FRiF/PRiF,FALSE,0,SRA,S1,,K562_90pct_RNArc_r2,,K562 RNA spike-in 90pct RC,GSM1480327;GSM765405,K562 cells,,,,,,,,,,,,,,,,,,,,,,, -K562_GRO-seq,K562 GRO-seq,none,1,1,GRO,human,SINGLE,K562,degraded library; low pause index,FALSE,0,SRA,R,,SRR1552484,SRX682020,K562 GRO-seq,GSM1480325,K562 cells,"antibody: anti-BrU (Santa Cruz Biotechnology, sc-32323-ac, lot# I2111)","GRO-seq libraries were prepared as in Core et al. (Core, Waterfall, and Lis, 2008), with the following modifications. Trizol (Invitrogen) was used to stop the reaction instead of DNase I and proteinase K treatment. The RNA was further extracted once with acid phenol:chloroform, and once with chloroform before precipitating with 2.5 volumes of -20oC ethanol. Bead binding buffers all contained 4units/ml of SUPERaseIN (Ambion) and the following buffers were slightly modified. Bead blocking buffer: 0.25X SSPE, 1mM EDTA, 0.05% Tween, 0.1% PVP, and 1mg/ml ultrapure BSA (Ambion); Binding buffer: 0.25XSSPE, 37.5mM NaCl, 1mM EDTA, 0.05% tween; Low-salt wash buffer: 0.2X SSPE, 1mM EDTA, 0.05% Tween. High-salt wash buffer: 0.25% SSPE, 137.5mM NaCl, 1mM EDTA, 0.05% Tween. The end repair steps were modified as follows. Pelleted RNA from the first bead binding was resuspended in 20ul, and heated to 70oC for 5min, followed by incubation on ice for 2min. 1.5ul tobacco acid pyrophosphatase (TAP) buffer, 4.5ul water, 1 ul SUPERaseIn, and 1.5ul TAP (Epicentre) were then added and the reaction incubated at 37oC for 1.5 hours. 1ul 300mM MgCl2 and 1ul T4 polynucleotide Kinase (PNK) were added to the reaction for an additional protocols l 30 min. for phosphorylating the 5'-ends, 20ul T4 PNK buffer, 2ul 100mM ATP, 145ul water, 1ul SUPERaseIn, and an additional 2ul of PNK were added for 30 min at 37oC. The reaction was then stopped by addition of 20mM EDTA followed by acid phenol extraction and precipitation.",cDNA (from nascent RNA),Supplementary_files_format_and_content: Processed data files are bigWigs of each sample. Each entry represents the number of reads at each base.,GPL11154,"Leighton,James,Core",ljc37@cornell.edu,John T. Lis,Moleular Biology and Genetics,Cornell University,417 Biotechnology Building,Ithaca,14853,USA,Illumina HiSeq 2000,other,transcriptomic,OTHER,SRA: https://www.ncbi.nlm.nih.gov/sra?term,GSM1480325,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM1480nnn/GSM1480325/suppl/GSM1480325_K562_GROseq_minus.bigWig,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM1480nnn/GSM1480325/suppl/GSM1480325_K562_GROseq_plus.bigWig,GSE60456 -HelaS3_GRO-seq,HelaS3 GRO-seq,none,1,1,GRO,human,SINGLE,HelaS3,low pause index; high mRNA contamination; no adapter distribution; poor TSS enrichment; high rRNA abundance,FALSE,0,SRA,R,,SRR169361[1-2],SRX796411,HelaS3_GRO-seq,GSM1558746,HelaS3_cells,cell type: HelaS3,"For 5’GRO-seq, immunoprecipitated RNA was dephosphorylated with calf intestinal phosphatase (NEB). Then 5′ capped fragments were de-capped with tobacco acid pyrophosphatase (Epicentre). Illumina TruSeq adapters were ligated to the RNA 3′ and 5′ ends with truncated mutant RNA ligase 2 (K227Q) and RNA ligase 1 (NEB), respectively. Reverse transcription was performed with Superscript III (Invitrogen) followed by PCR amplification for 12 cycles. Final libraries were size selected on PAGE/TBE gels to 175–225 bp. GRO-seq was essentially performed as 5’GRO-seq but the immunoprecipitated RNA was directly de-capped with tobacco acid pyrophosphatase (Epicentre) and subsequently kinased with PNK (NEB) prior to adapter ligation.",run-on RNA,Supplementary_files_format_and_content: bedGraph files are split by strand and scores represent the number of read 5'ends that aligned at that coordinate. .bed cluster file represent 5'-GRO-seq peak coordinates where column 3 is a unique identifier and column 4 is the coordinate of the position in the cluster containing the most reads (cluster mode),GPL16791,"Scott,Allen,Lacadie",scott.lacadie@mdc-berlin.de,Ohler,Berlin Institute for Medical Systems Biology,Max Delbrück Center for Molecular Medicine,Robert-Rössle-Str. 10,Berlin-Buch,13092,Germany,Illumina HiSeq 2500,cDNA,transcriptomic,RNA-Seq,SRA: https://www.ncbi.nlm.nih.gov/sra?term,GSM1558746,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM1558nnn/GSM1558746/suppl/GSM1558746_GRO-seq_signal_minus.bedGraph.gz,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM1558nnn/GSM1558746/suppl/GSM1558746_GRO-seq_signal_plus.bedGraph.gz,GSE63872 -Jurkat_ChRO-seq_1,Jurkat ChRO-seq,none,1,1,PRO,human,SINGLE,Jurkat,degraded library,true_6,6,SRA,R,,SRR7616133,SRX4480700,Jurkat chromatin run-on 1 [ChRO-seq],GSM3309956,Jurkat T-cells,batch: 1,We prepared PRO-seq libraries as described in Kwak et. al. (2013) Science.,Jurkat chromatin run-on,Supplementary_files_format_and_content: *bw: Mapped reads are provided in bigWig format.,GPL18573,"Charles,G,Danko",dankoc@gmail.com,Danko Lab,Baker Institute,Cornell University,Hungerford Hill Rd,Ithaca,14853,USA,Illumina NextSeq 500,other,transcriptomic,OTHER,SRA: https://www.ncbi.nlm.nih.gov/sra?term,GSM3309956,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3309nnn/GSM3309956/suppl/GSM3309956_5587_5598_24205_HGC2FBGXX_J_CHR_TGACCA_R1_minus.bw,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3309nnn/GSM3309956/suppl/GSM3309956_5587_5598_24205_HGC2FBGXX_J_CHR_TGACCA_R1_plus.bw,GSE117832 -Jurkat_ChRO-seq_2,Jurkat ChRO-seq,none,2,1,PRO,human,SINGLE,Jurkat,intact library,true_6,6,SRA,R,,SRR7616134,SRX4480701,Jurkat chromatin run-on 2 [ChRO-seq],GSM3309957,Jurkat T-cells,batch: 2,We prepared PRO-seq libraries as described in Kwak et. al. (2013) Science.,Jurkat chromatin run-on,Supplementary_files_format_and_content: *bw: Mapped reads are provided in bigWig format.,GPL18573,"Charles,G,Danko",dankoc@gmail.com,Danko Lab,Baker Institute,Cornell University,Hungerford Hill Rd,Ithaca,14853,USA,Illumina NextSeq 500,other,transcriptomic,OTHER,SRA: https://www.ncbi.nlm.nih.gov/sra?term,GSM3309957,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3309nnn/GSM3309957/suppl/GSM3309957_Jurkat_ChRO_NoRNase_minus.bw,ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3309nnn/GSM3309957/suppl/GSM3309957_Jurkat_ChRO_NoRNase_plus.bw,GSE117832 -HEK_PRO-seq,"HEK w/ osTIR1, ZNF143AID PRO-seq",Auxin,1,1,PRO,human,PAIRED,HEK293T,very intact RNA,true_8,8,SRA,H1,H2,SRR8608074,SRX5407488,HEK_TIR1_ZNF143AID_Auxin_rep4,GSM3618147,HEK-293T cells,cell line: HEK-293T,,,,,,,,,,,,,,,,,,,GSM3618147,,, -HEK_ARF_PRO-seq,"HEK w/ osTIR1, ZNF143AID, ARF PRO-seq",Auxin,1,1,PRO,human,PAIRED,HEK293T,very degraded library (post Pol liberation),true_8,8,SRA,H1,H2,SRR8608070,SRX5407484,HEK_TIR1_ZNF143AID_ARF_Auxin_rep4,GSM3618143,HEK-293T cells,cell line: HEK-293T,,,,,,,,,,,,,,,,,,,,,, -H9_PRO-seq_1,H9 PRO-seq,DMSO,1,1,PRO,human,PAIRED,H9,low pause index; differential analysis,true_8,8,SRA,H1,H2,H9_DMSO_rep1,,H9_DMSO_rep1,GSM4214080,H9 cells,,,,,,,,,,,,,,,,,,,,,,, -H9_PRO-seq_2,H9 PRO-seq,DMSO,2,1,PRO,human,PAIRED,H9,low pause index; differential analysis,true_8,8,SRA,H1,H2,H9_DMSO_rep2,,H9_DMSO_rep2,GSM4214081,H9 cells,,,,,,,,,,,,,,,,,,,,,,, -H9_PRO-seq_3,H9 PRO-seq,DMSO,3,1,PRO,human,PAIRED,H9,low pause index; differential analysis,true_8,8,SRA,H1,H2,H9_DMSO_rep3,,H9_DMSO_rep3,GSM4214082,H9 cells,,,,,,,,,,,,,,,,,,,,,,, -H9_treated_PRO-seq_1,H9 treated PRO-seq,200 nM romidepsin,1,1,PRO,human,PAIRED,H9,high pause index; differential analysis,true_8,8,SRA,H1,H2,H9_200nM_romidepsin_rep1,,H9_200nM_romidepsin_rep1,GSM4214083,H9 cells,,,,,,,,,,,,,,,,,,,,,,, -H9_treated_PRO-seq_2,H9 treated PRO-seq,200 nM romidepsin,2,1,PRO,human,PAIRED,H9,high pause index; differential analysis,true_8,8,SRA,H1,H2,H9_200nM_romidepsin_rep2,,H9_200nM_romidepsin_rep2,GSM4214084,H9 cells,,,,,,,,,,,,,,,,,,,,,,, -H9_treated_PRO-seq_3,H9 treated PRO-seq,200 nM romidepsin,3,1,PRO,human,PAIRED,H9,high pause index; differential analysis,true_8,8,SRA,H1,H2,H9_200nM_romidepsin_rep3,,H9_200nM_romidepsin_rep3,GSM4214085,H9 cells,,,,,,,,,,,,,,,,,,,,,,, diff --git a/sample_pep/subtable_automerge/sample_table.csv b/sample_pep/subtable_automerge/sample_table.csv deleted file mode 100644 index e8ebc20..0000000 --- a/sample_pep/subtable_automerge/sample_table.csv +++ /dev/null @@ -1,5 +0,0 @@ -sample_name,protocol,file -frog_1,anySampleType,data/frog1a_data.txt -frog_2,anySampleType,data/frog2a_data.txt -frog_3,anySampleType,data/frog3a_data.txt -frog_3,anySampleType,data/frog3b_data.txt diff --git a/sample_pep/subtable_automerge/subsample_table.csv b/sample_pep/subtable_automerge/subsample_table.csv deleted file mode 100644 index ef6b036..0000000 --- a/sample_pep/subtable_automerge/subsample_table.csv +++ /dev/null @@ -1,7 +0,0 @@ -sample_name,subsample_name,file -frog_1,sub_a,data/frog1a_data.txt -frog_1,sub_b,data/frog1b_data.txt -frog_1,sub_c,data/frog1c_data.txt -frog_2,sub_a,data/frog2a_data.txt -frog_2,sub_b,data/frog2b_data.txt -frog_3,sub_c,data/frog3c_data.txt diff --git a/tests/conftest.py b/tests/conftest.py index 5fe5a0b..625400c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,59 +1,57 @@ import peppy import pytest +import os +from sqlalchemy import create_engine +from sqlalchemy import text -@pytest.fixture -def sql_output_for_check_conn_db(): - return [ - (None, None, None, "private"), - (None, None, None, "digest"), - (None, None, None, "id"), - (None, None, None, "name"), - (None, None, None, "namespace"), - (None, None, None, "project_value"), - (None, None, None, "tag"), - (None, None, None, "number_of_samples"), - (None, None, None, "submission_date"), - (None, None, None, "last_update_date"), - ] +DNS = f"postgresql://postgres:docker@localhost:5432/pep-db" +from pepdbagent import PEPDatabaseAgent -@pytest.fixture -def test_dsn(): - return "postgresql://postgres:docker@localhost:5432/pep-base-sql" +DATA_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "tests", + "data", +) -@pytest.fixture -def test_peppy_project(): - return peppy.Project("tests/data/basic_pep/project_config.yaml") +def get_path_to_example_file(namespace, project_name): + return os.path.join(DATA_PATH, namespace, project_name, "project_config.yaml") @pytest.fixture -def test_database_project_return(): - return [ - 15, - { - "name": "public_project", - "_config": { - "pep_version": "2.0.0", - "sample_table": "/home/cgf8xr/databio/repos/example_peps/example_basic/sample_table.csv", - }, - "description": None, - "_sample_dict": { - "file": { - "frog_1": "data/frog1_data.txt", - "frog_2": "data/frog2_data.txt", - }, - "protocol": {"frog_1": "anySampleType", "frog_2": "anySampleType"}, - "sample_name": {"frog_1": "frog_1", "frog_2": "frog_2"}, - }, - "_subsample_dict": None, - }, - { - "status": "Unknown", - "n_samples": 2, - "is_private": False, - "description": None, - "last_update": "2022-10-24 12:24:24.210667", - }, - ] +def list_of_available_peps(): + pep_namespaces = os.listdir(DATA_PATH) + projects = {} + for np in pep_namespaces: + pep_name = os.listdir(os.path.join(DATA_PATH, np)) + projects[np] = {p: get_path_to_example_file(np, p) for p in pep_name} + return projects + + +@pytest.fixture(scope="function") +def initiate_pepdb_con( + list_of_available_peps, +): + sa_engine = create_engine(DNS) + with sa_engine.begin() as conn: + conn.execute(text("DROP table IF EXISTS projects")) + pepdb_con = PEPDatabaseAgent(dsn=DNS, echo=True) + for namespace, item in list_of_available_peps.items(): + if namespace == "private_test": + private = True + else: + private = False + for name, path in item.items(): + prj = peppy.Project(path) + pepdb_con.project.create( + namespace=namespace, + name=name, + tag="default", + is_private=private, + project=prj, + overwrite=True, + ) + + yield pepdb_con diff --git a/tests/data/basic_pep/project_config.yaml b/tests/data/basic_pep/project_config.yaml deleted file mode 100644 index 609c7a2..0000000 --- a/tests/data/basic_pep/project_config.yaml +++ /dev/null @@ -1,2 +0,0 @@ -pep_version: "2.0.0" -sample_table: sample_table.csv diff --git a/tests/data/basic_pep/sample_table.csv b/tests/data/basic_pep/sample_table.csv deleted file mode 100644 index d709d5e..0000000 --- a/tests/data/basic_pep/sample_table.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample_name,protocol,file -frog_1,anySampleType,data/frog1_data.txt -frog_2,anySampleType,data/frog2_data.txt diff --git a/sample_pep/BiocProject/data/laminB1Lads.bed b/tests/data/namespace1/BiocProject/data/laminB1Lads.bed similarity index 100% rename from sample_pep/BiocProject/data/laminB1Lads.bed rename to tests/data/namespace1/BiocProject/data/laminB1Lads.bed diff --git a/sample_pep/BiocProject/data/vistaEnhancers.bed b/tests/data/namespace1/BiocProject/data/vistaEnhancers.bed similarity index 100% rename from sample_pep/BiocProject/data/vistaEnhancers.bed rename to tests/data/namespace1/BiocProject/data/vistaEnhancers.bed diff --git a/sample_pep/BiocProject/project_config.yaml b/tests/data/namespace1/BiocProject/project_config.yaml similarity index 100% rename from sample_pep/BiocProject/project_config.yaml rename to tests/data/namespace1/BiocProject/project_config.yaml diff --git a/sample_pep/BiocProject/project_config_resize.yaml b/tests/data/namespace1/BiocProject/project_config_resize.yaml similarity index 100% rename from sample_pep/BiocProject/project_config_resize.yaml rename to tests/data/namespace1/BiocProject/project_config_resize.yaml diff --git a/sample_pep/BiocProject/readBedFiles.R b/tests/data/namespace1/BiocProject/readBedFiles.R similarity index 100% rename from sample_pep/BiocProject/readBedFiles.R rename to tests/data/namespace1/BiocProject/readBedFiles.R diff --git a/sample_pep/BiocProject/readBedFiles_resize.R b/tests/data/namespace1/BiocProject/readBedFiles_resize.R similarity index 100% rename from sample_pep/BiocProject/readBedFiles_resize.R rename to tests/data/namespace1/BiocProject/readBedFiles_resize.R diff --git a/sample_pep/BiocProject/sample_table.csv b/tests/data/namespace1/BiocProject/sample_table.csv similarity index 97% rename from sample_pep/BiocProject/sample_table.csv rename to tests/data/namespace1/BiocProject/sample_table.csv index 59de6e5..497b905 100644 --- a/sample_pep/BiocProject/sample_table.csv +++ b/tests/data/namespace1/BiocProject/sample_table.csv @@ -1,3 +1,3 @@ -sample_name,file_path -laminB1Lads,data/laminB1Lads.bed +sample_name,file_path +laminB1Lads,data/laminB1Lads.bed vistaEnhancers,data/vistaEnhancers.bed diff --git a/sample_pep/amendments1/project_config.yaml b/tests/data/namespace1/amendments1/project_config.yaml similarity index 100% rename from sample_pep/amendments1/project_config.yaml rename to tests/data/namespace1/amendments1/project_config.yaml diff --git a/sample_pep/derive/sample_table.csv b/tests/data/namespace1/amendments1/sample_table.csv old mode 100755 new mode 100644 similarity index 97% rename from sample_pep/derive/sample_table.csv rename to tests/data/namespace1/amendments1/sample_table.csv index bcfd9bd..9b2b752 --- a/sample_pep/derive/sample_table.csv +++ b/tests/data/namespace1/amendments1/sample_table.csv @@ -1,5 +1,5 @@ -sample_name,protocol,organism,time,file_path -pig_0h,RRBS,pig,0,source1 -pig_1h,RRBS,pig,1,source1 -frog_0h,RRBS,frog,0,source1 +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 frog_1h,RRBS,frog,1,source1 diff --git a/sample_pep/amendments1/sample_table_newLib.csv b/tests/data/namespace1/amendments1/sample_table_newLib.csv similarity index 100% rename from sample_pep/amendments1/sample_table_newLib.csv rename to tests/data/namespace1/amendments1/sample_table_newLib.csv diff --git a/sample_pep/amendments1/sample_table_newLib2.csv b/tests/data/namespace1/amendments1/sample_table_newLib2.csv similarity index 100% rename from sample_pep/amendments1/sample_table_newLib2.csv rename to tests/data/namespace1/amendments1/sample_table_newLib2.csv diff --git a/sample_pep/amendments1/sample_table_pre.csv b/tests/data/namespace1/amendments1/sample_table_pre.csv similarity index 100% rename from sample_pep/amendments1/sample_table_pre.csv rename to tests/data/namespace1/amendments1/sample_table_pre.csv diff --git a/sample_pep/amendments2/project_config.yaml b/tests/data/namespace1/amendments2/project_config.yaml similarity index 100% rename from sample_pep/amendments2/project_config.yaml rename to tests/data/namespace1/amendments2/project_config.yaml diff --git a/sample_pep/amendments2/sample_table.csv b/tests/data/namespace1/amendments2/sample_table.csv similarity index 98% rename from sample_pep/amendments2/sample_table.csv rename to tests/data/namespace1/amendments2/sample_table.csv index 2aa31e1..ffea777 100644 --- a/sample_pep/amendments2/sample_table.csv +++ b/tests/data/namespace1/amendments2/sample_table.csv @@ -1,7 +1,7 @@ -sample_name,organism,time,file_path,protocol -frog_0h,frog,0,data/lab/project/frog_0h.fastq,ATAC-seq -frog_1h,frog,1,data/lab/project/frog_1h.fastq,ATAC-seq -human_1h,human,1,data/lab/project/human_1h.fastq,ATAC-seq -human_0h,human,0,data/lab/project/human_0h.fastq,ATAC-seq -mouse_1h,mouse,1,data/lab/project/mouse_1h.fastq,ATAC-seq +sample_name,organism,time,file_path,protocol +frog_0h,frog,0,data/lab/project/frog_0h.fastq,ATAC-seq +frog_1h,frog,1,data/lab/project/frog_1h.fastq,ATAC-seq +human_1h,human,1,data/lab/project/human_1h.fastq,ATAC-seq +human_0h,human,0,data/lab/project/human_0h.fastq,ATAC-seq +mouse_1h,mouse,1,data/lab/project/mouse_1h.fastq,ATAC-seq mouse_0h,mouse,0,data/lab/project/mouse_1h.fastq,ATAC-seq diff --git a/sample_pep/amendments2/sample_table_noFrog.csv b/tests/data/namespace1/amendments2/sample_table_noFrog.csv similarity index 98% rename from sample_pep/amendments2/sample_table_noFrog.csv rename to tests/data/namespace1/amendments2/sample_table_noFrog.csv index 7ee8b57..d5e2282 100644 --- a/sample_pep/amendments2/sample_table_noFrog.csv +++ b/tests/data/namespace1/amendments2/sample_table_noFrog.csv @@ -1,5 +1,5 @@ -sample_name,organism,time,file_path,protocol -human_1h,human,1,data/lab/project/human_1h.fastq,ATAC-seq -human_0h,human,0,data/lab/project/human_0h.fastq,ATAC-seq -mouse_1h,mouse,1,data/lab/project/mouse_1h.fastq,ATAC-seq +sample_name,organism,time,file_path,protocol +human_1h,human,1,data/lab/project/human_1h.fastq,ATAC-seq +human_0h,human,0,data/lab/project/human_0h.fastq,ATAC-seq +mouse_1h,mouse,1,data/lab/project/mouse_1h.fastq,ATAC-seq mouse_0h,mouse,0,data/lab/project/mouse_1h.fastq,ATAC-seq diff --git a/sample_pep/amendments2/sample_table_pre.csv b/tests/data/namespace1/amendments2/sample_table_pre.csv similarity index 100% rename from sample_pep/amendments2/sample_table_pre.csv rename to tests/data/namespace1/amendments2/sample_table_pre.csv diff --git a/sample_pep/append/project_config.yaml b/tests/data/namespace1/append/project_config.yaml similarity index 100% rename from sample_pep/append/project_config.yaml rename to tests/data/namespace1/append/project_config.yaml diff --git a/sample_pep/duplicate/sample_table.csv b/tests/data/namespace1/append/sample_table.csv similarity index 95% rename from sample_pep/duplicate/sample_table.csv rename to tests/data/namespace1/append/sample_table.csv index b3e480d..6436c91 100644 --- a/sample_pep/duplicate/sample_table.csv +++ b/tests/data/namespace1/append/sample_table.csv @@ -1,5 +1,5 @@ -sample_name,organism,time -pig_0h,pig,0 -pig_1h,pig,1 -frog_0h,frog,0 +sample_name,organism,time +pig_0h,pig,0 +pig_1h,pig,1 +frog_0h,frog,0 frog_1h,frog,1 diff --git a/sample_pep/append/sample_table_pre.csv b/tests/data/namespace1/append/sample_table_pre.csv similarity index 96% rename from sample_pep/append/sample_table_pre.csv rename to tests/data/namespace1/append/sample_table_pre.csv index e645040..aa92f1b 100644 --- a/sample_pep/append/sample_table_pre.csv +++ b/tests/data/namespace1/append/sample_table_pre.csv @@ -1,5 +1,5 @@ -sample_name,organism,time,read_type -pig_0h,pig,0,SINGLE -pig_1h,pig,1,SINGLE -frog_0h,frog,0,SINGLE +sample_name,organism,time,read_type +pig_0h,pig,0,SINGLE +pig_1h,pig,1,SINGLE +frog_0h,frog,0,SINGLE frog_1h,frog,1,SINGLE diff --git a/sample_pep/automerge/project_config.yaml b/tests/data/namespace1/automerge/project_config.yaml similarity index 100% rename from sample_pep/automerge/project_config.yaml rename to tests/data/namespace1/automerge/project_config.yaml diff --git a/sample_pep/automerge/sample_table.csv b/tests/data/namespace1/automerge/sample_table.csv similarity index 100% rename from sample_pep/automerge/sample_table.csv rename to tests/data/namespace1/automerge/sample_table.csv diff --git a/sample_pep/basic/project_config.yaml b/tests/data/namespace1/basic/project_config.yaml similarity index 100% rename from sample_pep/basic/project_config.yaml rename to tests/data/namespace1/basic/project_config.yaml diff --git a/sample_pep/basic/sample_table.csv b/tests/data/namespace1/basic/sample_table.csv similarity index 100% rename from sample_pep/basic/sample_table.csv rename to tests/data/namespace1/basic/sample_table.csv diff --git a/sample_pep/BiocProject_exceptions/project_config.yaml b/tests/data/namespace2/BiocProject_exceptions/project_config.yaml similarity index 100% rename from sample_pep/BiocProject_exceptions/project_config.yaml rename to tests/data/namespace2/BiocProject_exceptions/project_config.yaml diff --git a/sample_pep/BiocProject_exceptions/readBedFilesExceptions.R b/tests/data/namespace2/BiocProject_exceptions/readBedFilesExceptions.R similarity index 100% rename from sample_pep/BiocProject_exceptions/readBedFilesExceptions.R rename to tests/data/namespace2/BiocProject_exceptions/readBedFilesExceptions.R diff --git a/sample_pep/BiocProject_exceptions/sample_table.csv b/tests/data/namespace2/BiocProject_exceptions/sample_table.csv similarity index 97% rename from sample_pep/BiocProject_exceptions/sample_table.csv rename to tests/data/namespace2/BiocProject_exceptions/sample_table.csv index 59de6e5..497b905 100644 --- a/sample_pep/BiocProject_exceptions/sample_table.csv +++ b/tests/data/namespace2/BiocProject_exceptions/sample_table.csv @@ -1,3 +1,3 @@ -sample_name,file_path -laminB1Lads,data/laminB1Lads.bed +sample_name,file_path +laminB1Lads,data/laminB1Lads.bed vistaEnhancers,data/vistaEnhancers.bed diff --git a/sample_pep/BiocProject_remote/project_config.yaml b/tests/data/namespace2/BiocProject_remote/project_config.yaml similarity index 100% rename from sample_pep/BiocProject_remote/project_config.yaml rename to tests/data/namespace2/BiocProject_remote/project_config.yaml diff --git a/sample_pep/BiocProject_remote/project_config_resize.yaml b/tests/data/namespace2/BiocProject_remote/project_config_resize.yaml similarity index 100% rename from sample_pep/BiocProject_remote/project_config_resize.yaml rename to tests/data/namespace2/BiocProject_remote/project_config_resize.yaml diff --git a/sample_pep/BiocProject_remote/readRemoteData.R b/tests/data/namespace2/BiocProject_remote/readRemoteData.R similarity index 100% rename from sample_pep/BiocProject_remote/readRemoteData.R rename to tests/data/namespace2/BiocProject_remote/readRemoteData.R diff --git a/sample_pep/BiocProject_remote/readRemoteData_resize.R b/tests/data/namespace2/BiocProject_remote/readRemoteData_resize.R similarity index 100% rename from sample_pep/BiocProject_remote/readRemoteData_resize.R rename to tests/data/namespace2/BiocProject_remote/readRemoteData_resize.R diff --git a/sample_pep/BiocProject_remote/sample_table.csv b/tests/data/namespace2/BiocProject_remote/sample_table.csv similarity index 100% rename from sample_pep/BiocProject_remote/sample_table.csv rename to tests/data/namespace2/BiocProject_remote/sample_table.csv diff --git a/sample_pep/custom_index/project_config.yaml b/tests/data/namespace2/custom_index/project_config.yaml similarity index 100% rename from sample_pep/custom_index/project_config.yaml rename to tests/data/namespace2/custom_index/project_config.yaml diff --git a/sample_pep/custom_index/sample_table.csv b/tests/data/namespace2/custom_index/sample_table.csv similarity index 100% rename from sample_pep/custom_index/sample_table.csv rename to tests/data/namespace2/custom_index/sample_table.csv diff --git a/sample_pep/derive/project_config.yaml b/tests/data/namespace2/derive/project_config.yaml similarity index 100% rename from sample_pep/derive/project_config.yaml rename to tests/data/namespace2/derive/project_config.yaml diff --git a/sample_pep/remove/sample_table.csv b/tests/data/namespace2/derive/sample_table.csv old mode 100644 new mode 100755 similarity index 97% rename from sample_pep/remove/sample_table.csv rename to tests/data/namespace2/derive/sample_table.csv index bcfd9bd..9b2b752 --- a/sample_pep/remove/sample_table.csv +++ b/tests/data/namespace2/derive/sample_table.csv @@ -1,5 +1,5 @@ -sample_name,protocol,organism,time,file_path -pig_0h,RRBS,pig,0,source1 -pig_1h,RRBS,pig,1,source1 -frog_0h,RRBS,frog,0,source1 +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 frog_1h,RRBS,frog,1,source1 diff --git a/sample_pep/derive/sample_table_pre.csv b/tests/data/namespace2/derive/sample_table_pre.csv similarity index 100% rename from sample_pep/derive/sample_table_pre.csv rename to tests/data/namespace2/derive/sample_table_pre.csv diff --git a/sample_pep/derive_imply/project_config.yaml b/tests/data/namespace2/derive_imply/project_config.yaml similarity index 100% rename from sample_pep/derive_imply/project_config.yaml rename to tests/data/namespace2/derive_imply/project_config.yaml diff --git a/sample_pep/derive_imply/sample_table.csv b/tests/data/namespace2/derive_imply/sample_table.csv similarity index 100% rename from sample_pep/derive_imply/sample_table.csv rename to tests/data/namespace2/derive_imply/sample_table.csv diff --git a/sample_pep/derive_imply/sample_table_pre.csv b/tests/data/namespace2/derive_imply/sample_table_pre.csv similarity index 100% rename from sample_pep/derive_imply/sample_table_pre.csv rename to tests/data/namespace2/derive_imply/sample_table_pre.csv diff --git a/sample_pep/duplicate/project_config.yaml b/tests/data/namespace2/duplicate/project_config.yaml similarity index 100% rename from sample_pep/duplicate/project_config.yaml rename to tests/data/namespace2/duplicate/project_config.yaml diff --git a/sample_pep/append/sample_table.csv b/tests/data/namespace2/duplicate/sample_table.csv similarity index 94% rename from sample_pep/append/sample_table.csv rename to tests/data/namespace2/duplicate/sample_table.csv index 331a990..6436c91 100644 --- a/sample_pep/append/sample_table.csv +++ b/tests/data/namespace2/duplicate/sample_table.csv @@ -1,5 +1,5 @@ -sample_name,organism,time -pig_0h,pig,0 -pig_1h,pig,1 -frog_0h,frog,0 -frog_1h,frog,1 +sample_name,organism,time +pig_0h,pig,0 +pig_1h,pig,1 +frog_0h,frog,0 +frog_1h,frog,1 diff --git a/sample_pep/imply/project_config.yaml b/tests/data/namespace2/imply/project_config.yaml similarity index 100% rename from sample_pep/imply/project_config.yaml rename to tests/data/namespace2/imply/project_config.yaml diff --git a/sample_pep/imply/sample_table.csv b/tests/data/namespace2/imply/sample_table.csv similarity index 100% rename from sample_pep/imply/sample_table.csv rename to tests/data/namespace2/imply/sample_table.csv diff --git a/sample_pep/imply/sample_table_pre.csv b/tests/data/namespace2/imply/sample_table_pre.csv similarity index 100% rename from sample_pep/imply/sample_table_pre.csv rename to tests/data/namespace2/imply/sample_table_pre.csv diff --git a/sample_pep/imports/project_config.yaml b/tests/data/namespace2/imports/project_config.yaml similarity index 100% rename from sample_pep/imports/project_config.yaml rename to tests/data/namespace2/imports/project_config.yaml diff --git a/sample_pep/imports/project_config1.yaml b/tests/data/namespace2/imports/project_config1.yaml similarity index 100% rename from sample_pep/imports/project_config1.yaml rename to tests/data/namespace2/imports/project_config1.yaml diff --git a/sample_pep/imports/sample_table.csv b/tests/data/namespace2/imports/sample_table.csv similarity index 100% rename from sample_pep/imports/sample_table.csv rename to tests/data/namespace2/imports/sample_table.csv diff --git a/sample_pep/node_alias/project_config.yaml b/tests/data/namespace3/node_alias/project_config.yaml similarity index 100% rename from sample_pep/node_alias/project_config.yaml rename to tests/data/namespace3/node_alias/project_config.yaml diff --git a/sample_pep/node_alias/project_config1.yaml b/tests/data/namespace3/node_alias/project_config1.yaml similarity index 100% rename from sample_pep/node_alias/project_config1.yaml rename to tests/data/namespace3/node_alias/project_config1.yaml diff --git a/sample_pep/node_alias/sample_table.csv b/tests/data/namespace3/node_alias/sample_table.csv similarity index 100% rename from sample_pep/node_alias/sample_table.csv rename to tests/data/namespace3/node_alias/sample_table.csv diff --git a/sample_pep/noname/project_config.yaml b/tests/data/namespace3/noname/project_config.yaml similarity index 100% rename from sample_pep/noname/project_config.yaml rename to tests/data/namespace3/noname/project_config.yaml diff --git a/sample_pep/noname/project_config_noname.yaml b/tests/data/namespace3/noname/project_config_noname.yaml similarity index 100% rename from sample_pep/noname/project_config_noname.yaml rename to tests/data/namespace3/noname/project_config_noname.yaml diff --git a/sample_pep/noname/sample_table.csv b/tests/data/namespace3/noname/sample_table.csv similarity index 94% rename from sample_pep/noname/sample_table.csv rename to tests/data/namespace3/noname/sample_table.csv index 0fb897e..07477ec 100755 --- a/sample_pep/noname/sample_table.csv +++ b/tests/data/namespace3/noname/sample_table.csv @@ -1,5 +1,5 @@ -id,protocol,organism,time -1,RRBS,pig,0 -2,RRBS,pig,1 -3,RRBS,frog,0 -4,RRBS,frog,1 +id,protocol,organism,time +1,RRBS,pig,0 +2,RRBS,pig,1 +3,RRBS,frog,0 +4,RRBS,frog,1 diff --git a/sample_pep/piface/annotation_sheet.csv b/tests/data/namespace3/piface/annotation_sheet.csv similarity index 98% rename from sample_pep/piface/annotation_sheet.csv rename to tests/data/namespace3/piface/annotation_sheet.csv index 51bd5d6..2d0e126 100644 --- a/sample_pep/piface/annotation_sheet.csv +++ b/tests/data/namespace3/piface/annotation_sheet.csv @@ -1,4 +1,4 @@ -sample_name,protocol,data_source,SRR,Sample_geo_accession,read1,read2 -sample1,PROTO1,SRA,SRR5210416,GSM2471255,SRA_1,SRA_2 -sample2,PROTO1,SRA,SRR5210450,GSM2471300,SRA_1,SRA_2 +sample_name,protocol,data_source,SRR,Sample_geo_accession,read1,read2 +sample1,PROTO1,SRA,SRR5210416,GSM2471255,SRA_1,SRA_2 +sample2,PROTO1,SRA,SRR5210450,GSM2471300,SRA_1,SRA_2 sample3,PROTO2,SRA,SRR5210398,GSM2471249,SRA_1,SRA_2 diff --git a/sample_pep/piface/output_schema.yaml b/tests/data/namespace3/piface/output_schema.yaml similarity index 100% rename from sample_pep/piface/output_schema.yaml rename to tests/data/namespace3/piface/output_schema.yaml diff --git a/sample_pep/piface/output_schema_project.yaml b/tests/data/namespace3/piface/output_schema_project.yaml similarity index 100% rename from sample_pep/piface/output_schema_project.yaml rename to tests/data/namespace3/piface/output_schema_project.yaml diff --git a/sample_pep/piface/output_schema_sample.yaml b/tests/data/namespace3/piface/output_schema_sample.yaml similarity index 100% rename from sample_pep/piface/output_schema_sample.yaml rename to tests/data/namespace3/piface/output_schema_sample.yaml diff --git a/sample_pep/piface/pipeline_interface1_project.yaml b/tests/data/namespace3/piface/pipeline_interface1_project.yaml similarity index 100% rename from sample_pep/piface/pipeline_interface1_project.yaml rename to tests/data/namespace3/piface/pipeline_interface1_project.yaml diff --git a/sample_pep/piface/pipeline_interface1_sample.yaml b/tests/data/namespace3/piface/pipeline_interface1_sample.yaml similarity index 100% rename from sample_pep/piface/pipeline_interface1_sample.yaml rename to tests/data/namespace3/piface/pipeline_interface1_sample.yaml diff --git a/sample_pep/piface/pipeline_interface2_project.yaml b/tests/data/namespace3/piface/pipeline_interface2_project.yaml similarity index 100% rename from sample_pep/piface/pipeline_interface2_project.yaml rename to tests/data/namespace3/piface/pipeline_interface2_project.yaml diff --git a/sample_pep/piface/pipeline_interface2_sample.yaml b/tests/data/namespace3/piface/pipeline_interface2_sample.yaml similarity index 100% rename from sample_pep/piface/pipeline_interface2_sample.yaml rename to tests/data/namespace3/piface/pipeline_interface2_sample.yaml diff --git a/sample_pep/piface/project_config.yaml b/tests/data/namespace3/piface/project_config.yaml similarity index 100% rename from sample_pep/piface/project_config.yaml rename to tests/data/namespace3/piface/project_config.yaml diff --git a/sample_pep/piface/readData.R b/tests/data/namespace3/piface/readData.R similarity index 100% rename from sample_pep/piface/readData.R rename to tests/data/namespace3/piface/readData.R diff --git a/sample_pep/piface/resources-project.tsv b/tests/data/namespace3/piface/resources-project.tsv similarity index 100% rename from sample_pep/piface/resources-project.tsv rename to tests/data/namespace3/piface/resources-project.tsv diff --git a/sample_pep/piface/resources-sample.tsv b/tests/data/namespace3/piface/resources-sample.tsv similarity index 100% rename from sample_pep/piface/resources-sample.tsv rename to tests/data/namespace3/piface/resources-sample.tsv diff --git a/sample_pep/remove/project_config.yaml b/tests/data/namespace3/remove/project_config.yaml similarity index 100% rename from sample_pep/remove/project_config.yaml rename to tests/data/namespace3/remove/project_config.yaml diff --git a/sample_pep/amendments1/sample_table.csv b/tests/data/namespace3/remove/sample_table.csv similarity index 97% rename from sample_pep/amendments1/sample_table.csv rename to tests/data/namespace3/remove/sample_table.csv index bcfd9bd..9b2b752 100644 --- a/sample_pep/amendments1/sample_table.csv +++ b/tests/data/namespace3/remove/sample_table.csv @@ -1,5 +1,5 @@ -sample_name,protocol,organism,time,file_path -pig_0h,RRBS,pig,0,source1 -pig_1h,RRBS,pig,1,source1 -frog_0h,RRBS,frog,0,source1 +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 frog_1h,RRBS,frog,1,source1 diff --git a/sample_pep/subtable1/project_config.yaml b/tests/data/namespace3/subtable1/project_config.yaml similarity index 100% rename from sample_pep/subtable1/project_config.yaml rename to tests/data/namespace3/subtable1/project_config.yaml diff --git a/sample_pep/subtable1/sample_table.csv b/tests/data/namespace3/subtable1/sample_table.csv similarity index 100% rename from sample_pep/subtable1/sample_table.csv rename to tests/data/namespace3/subtable1/sample_table.csv diff --git a/sample_pep/subtable1/subsample_table.csv b/tests/data/namespace3/subtable1/subsample_table.csv similarity index 100% rename from sample_pep/subtable1/subsample_table.csv rename to tests/data/namespace3/subtable1/subsample_table.csv diff --git a/sample_pep/subtable2/project_config.yaml b/tests/data/namespace3/subtable2/project_config.yaml similarity index 100% rename from sample_pep/subtable2/project_config.yaml rename to tests/data/namespace3/subtable2/project_config.yaml diff --git a/sample_pep/subtable2/sample_table.csv b/tests/data/namespace3/subtable2/sample_table.csv similarity index 100% rename from sample_pep/subtable2/sample_table.csv rename to tests/data/namespace3/subtable2/sample_table.csv diff --git a/sample_pep/subtable2/subsample_table.csv b/tests/data/namespace3/subtable2/subsample_table.csv similarity index 100% rename from sample_pep/subtable2/subsample_table.csv rename to tests/data/namespace3/subtable2/subsample_table.csv diff --git a/sample_pep/subtable3/project_config.yaml b/tests/data/namespace3/subtable3/project_config.yaml similarity index 100% rename from sample_pep/subtable3/project_config.yaml rename to tests/data/namespace3/subtable3/project_config.yaml diff --git a/sample_pep/subtable3/sample_table.csv b/tests/data/namespace3/subtable3/sample_table.csv similarity index 100% rename from sample_pep/subtable3/sample_table.csv rename to tests/data/namespace3/subtable3/sample_table.csv diff --git a/sample_pep/subtable3/subsample_table.csv b/tests/data/namespace3/subtable3/subsample_table.csv similarity index 100% rename from sample_pep/subtable3/subsample_table.csv rename to tests/data/namespace3/subtable3/subsample_table.csv diff --git a/sample_pep/subtable4/project_config.yaml b/tests/data/namespace3/subtable4/project_config.yaml similarity index 100% rename from sample_pep/subtable4/project_config.yaml rename to tests/data/namespace3/subtable4/project_config.yaml diff --git a/sample_pep/subtable4/sample_table.csv b/tests/data/namespace3/subtable4/sample_table.csv similarity index 100% rename from sample_pep/subtable4/sample_table.csv rename to tests/data/namespace3/subtable4/sample_table.csv diff --git a/sample_pep/subtable4/subsample_table.csv b/tests/data/namespace3/subtable4/subsample_table.csv similarity index 100% rename from sample_pep/subtable4/subsample_table.csv rename to tests/data/namespace3/subtable4/subsample_table.csv diff --git a/sample_pep/subtable5/project_config.yaml b/tests/data/namespace3/subtable5/project_config.yaml similarity index 100% rename from sample_pep/subtable5/project_config.yaml rename to tests/data/namespace3/subtable5/project_config.yaml diff --git a/sample_pep/subtable5/sample_table.csv b/tests/data/namespace3/subtable5/sample_table.csv similarity index 100% rename from sample_pep/subtable5/sample_table.csv rename to tests/data/namespace3/subtable5/sample_table.csv diff --git a/sample_pep/subtable5/subsample_table.csv b/tests/data/namespace3/subtable5/subsample_table.csv similarity index 100% rename from sample_pep/subtable5/subsample_table.csv rename to tests/data/namespace3/subtable5/subsample_table.csv diff --git a/sample_pep/subtables/project_config.yaml b/tests/data/namespace3/subtables/project_config.yaml similarity index 100% rename from sample_pep/subtables/project_config.yaml rename to tests/data/namespace3/subtables/project_config.yaml diff --git a/sample_pep/subtables/sample_table.csv b/tests/data/namespace3/subtables/sample_table.csv similarity index 100% rename from sample_pep/subtables/sample_table.csv rename to tests/data/namespace3/subtables/sample_table.csv diff --git a/sample_pep/subtables/subsample_table.csv b/tests/data/namespace3/subtables/subsample_table.csv similarity index 100% rename from sample_pep/subtables/subsample_table.csv rename to tests/data/namespace3/subtables/subsample_table.csv diff --git a/sample_pep/subtables/subsample_table1.csv b/tests/data/namespace3/subtables/subsample_table1.csv similarity index 100% rename from sample_pep/subtables/subsample_table1.csv rename to tests/data/namespace3/subtables/subsample_table1.csv diff --git a/tests/data/private_test/BiocProject_exceptions/project_config.yaml b/tests/data/private_test/BiocProject_exceptions/project_config.yaml new file mode 100644 index 0000000..a6e9e52 --- /dev/null +++ b/tests/data/private_test/BiocProject_exceptions/project_config.yaml @@ -0,0 +1,6 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv + +bioconductor: + readFunName: readBedFilesExceptions + readFunPath: readBedFilesExceptions.R diff --git a/tests/data/private_test/BiocProject_exceptions/readBedFilesExceptions.R b/tests/data/private_test/BiocProject_exceptions/readBedFilesExceptions.R new file mode 100644 index 0000000..2530f9c --- /dev/null +++ b/tests/data/private_test/BiocProject_exceptions/readBedFilesExceptions.R @@ -0,0 +1,15 @@ +readBedFilesExceptions = function(project) { + warning("first test warning") + warning("second test warning") + stop("test error") + paths = pepr::sampleTable(project)$file_path + sampleNames = pepr::sampleTable(project)$sample_name + setwd(dirname(project@file)) + result = lapply(paths, function(x){ + df = read.table(x) + colnames(df) = c('chr', 'start', 'end') + gr = GenomicRanges::GRanges(df) + }) + names(result) = sampleNames + return(GenomicRanges::GRangesList(result)) +} diff --git a/tests/data/private_test/BiocProject_exceptions/sample_table.csv b/tests/data/private_test/BiocProject_exceptions/sample_table.csv new file mode 100644 index 0000000..497b905 --- /dev/null +++ b/tests/data/private_test/BiocProject_exceptions/sample_table.csv @@ -0,0 +1,3 @@ +sample_name,file_path +laminB1Lads,data/laminB1Lads.bed +vistaEnhancers,data/vistaEnhancers.bed diff --git a/tests/data/private_test/amendments1/project_config.yaml b/tests/data/private_test/amendments1/project_config.yaml new file mode 100644 index 0000000..79da9ab --- /dev/null +++ b/tests/data/private_test/amendments1/project_config.yaml @@ -0,0 +1,16 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +output_dir: $HOME/hello_looper_results + +sample_modifiers: + derive: + attributes: [file_path] + sources: + source1: /data/lab/project/{organism}_{time}h.fastq + source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq +project_modifiers: + amend: + newLib: + sample_table: sample_table_newLib.csv + newLib2: + sample_table: sample_table_newLib2.csv diff --git a/tests/data/private_test/amendments1/sample_table.csv b/tests/data/private_test/amendments1/sample_table.csv new file mode 100644 index 0000000..9b2b752 --- /dev/null +++ b/tests/data/private_test/amendments1/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 +frog_1h,RRBS,frog,1,source1 diff --git a/tests/data/private_test/amendments1/sample_table_newLib.csv b/tests/data/private_test/amendments1/sample_table_newLib.csv new file mode 100644 index 0000000..f5ea5aa --- /dev/null +++ b/tests/data/private_test/amendments1/sample_table_newLib.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,ABCD,pig,0,source1 +pig_1h,ABCD,pig,1,source1 +frog_0h,ABCD,frog,0,source1 +frog_1h,ABCD,frog,1,source1 diff --git a/tests/data/private_test/amendments1/sample_table_newLib2.csv b/tests/data/private_test/amendments1/sample_table_newLib2.csv new file mode 100644 index 0000000..71be1bc --- /dev/null +++ b/tests/data/private_test/amendments1/sample_table_newLib2.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,EFGH,pig,0,source1 +pig_1h,EFGH,pig,1,source1 +frog_0h,EFGH,frog,0,source1 +frog_1h,EFGH,frog,1,source1 diff --git a/tests/data/private_test/amendments1/sample_table_pre.csv b/tests/data/private_test/amendments1/sample_table_pre.csv new file mode 100644 index 0000000..159fc34 --- /dev/null +++ b/tests/data/private_test/amendments1/sample_table_pre.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,data/lab/project/pig_0h.fastq +pig_1h,RRBS,pig,1,data/lab/project/pig_1h.fastq +frog_0h,RRBS,frog,0,data/lab/project/frog_0h.fastq +frog_1h,RRBS,frog,1,data/lab/project/frog_1h.fastq diff --git a/sample_pep/subtable_automerge/project_config.yaml b/tests/data/private_test/append/project_config.yaml similarity index 50% rename from sample_pep/subtable_automerge/project_config.yaml rename to tests/data/private_test/append/project_config.yaml index 75834d3..fd5b0f7 100644 --- a/sample_pep/subtable_automerge/project_config.yaml +++ b/tests/data/private_test/append/project_config.yaml @@ -1,3 +1,6 @@ pep_version: "2.0.0" sample_table: sample_table.csv -subsample_table: subsample_table.csv + +sample_modifiers: + append: + read_type: SINGLE diff --git a/tests/data/private_test/append/sample_table.csv b/tests/data/private_test/append/sample_table.csv new file mode 100644 index 0000000..6436c91 --- /dev/null +++ b/tests/data/private_test/append/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,organism,time +pig_0h,pig,0 +pig_1h,pig,1 +frog_0h,frog,0 +frog_1h,frog,1 diff --git a/tests/data/private_test/append/sample_table_pre.csv b/tests/data/private_test/append/sample_table_pre.csv new file mode 100644 index 0000000..aa92f1b --- /dev/null +++ b/tests/data/private_test/append/sample_table_pre.csv @@ -0,0 +1,5 @@ +sample_name,organism,time,read_type +pig_0h,pig,0,SINGLE +pig_1h,pig,1,SINGLE +frog_0h,frog,0,SINGLE +frog_1h,frog,1,SINGLE diff --git a/tests/data/private_test/derive/project_config.yaml b/tests/data/private_test/derive/project_config.yaml new file mode 100644 index 0000000..445929d --- /dev/null +++ b/tests/data/private_test/derive/project_config.yaml @@ -0,0 +1,10 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +output_dir: "$HOME/hello_looper_results" + +sample_modifiers: + derive: + attributes: [file_path] + sources: + source1: $HOME/data/lab/project/{organism}_{time}h.fastq + source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq diff --git a/tests/data/private_test/derive/sample_table.csv b/tests/data/private_test/derive/sample_table.csv new file mode 100755 index 0000000..9b2b752 --- /dev/null +++ b/tests/data/private_test/derive/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 +frog_1h,RRBS,frog,1,source1 diff --git a/tests/data/private_test/derive/sample_table_pre.csv b/tests/data/private_test/derive/sample_table_pre.csv new file mode 100755 index 0000000..159fc34 --- /dev/null +++ b/tests/data/private_test/derive/sample_table_pre.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,data/lab/project/pig_0h.fastq +pig_1h,RRBS,pig,1,data/lab/project/pig_1h.fastq +frog_0h,RRBS,frog,0,data/lab/project/frog_0h.fastq +frog_1h,RRBS,frog,1,data/lab/project/frog_1h.fastq diff --git a/tests/data/private_test/remove/project_config.yaml b/tests/data/private_test/remove/project_config.yaml new file mode 100644 index 0000000..7821eba --- /dev/null +++ b/tests/data/private_test/remove/project_config.yaml @@ -0,0 +1,12 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +output_dir: $HOME/hello_looper_results + +sample_modifiers: + derive: + attributes: [file_path] + sources: + source1: /data/lab/project/{organism}_{time}h.fastq + source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq + remove: + - protocol diff --git a/tests/data/private_test/remove/sample_table.csv b/tests/data/private_test/remove/sample_table.csv new file mode 100644 index 0000000..9b2b752 --- /dev/null +++ b/tests/data/private_test/remove/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 +frog_1h,RRBS,frog,1,source1 diff --git a/tests/data/private_test/subtable3/project_config.yaml b/tests/data/private_test/subtable3/project_config.yaml new file mode 100644 index 0000000..a744f05 --- /dev/null +++ b/tests/data/private_test/subtable3/project_config.yaml @@ -0,0 +1,13 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +subsample_table: subsample_table.csv +looper: + output_dir: $HOME/hello_looper_results + pipeline_interfaces: [../pipeline/pipeline_interface.yaml] + +sample_modifiers: + derive: + attributes: [file] + sources: + local_files: "../data/{identifier}{file_id}_data.txt" + local_files_unmerged: "../data/{identifier}*_data.txt" diff --git a/tests/data/private_test/subtable3/sample_table.csv b/tests/data/private_test/subtable3/sample_table.csv new file mode 100644 index 0000000..c1b94a6 --- /dev/null +++ b/tests/data/private_test/subtable3/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,identifier,file,file_id +frog_1,anySampleType,frog1,local_files, +frog_2,anySampleType,frog2,local_files_unmerged, +frog_3,anySampleType,frog3,local_files_unmerged, +frog_4,anySampleType,frog4,local_files_unmerged, diff --git a/tests/data/private_test/subtable3/subsample_table.csv b/tests/data/private_test/subtable3/subsample_table.csv new file mode 100644 index 0000000..24c0b45 --- /dev/null +++ b/tests/data/private_test/subtable3/subsample_table.csv @@ -0,0 +1,4 @@ +sample_name,file_id +frog_1,a +frog_1,b +frog_1,c diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 04bad40..c410e8b 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -1,380 +1,308 @@ -import datetime -import json - -import sqlalchemy import pytest -from psycopg2.errors import UniqueViolation - -from pepdbagent.models import BaseModel -from pepdbagent.pepdbagent import PEPDatabaseAgent - - -class TestBaseConnection: - """ - Test connections to the database - """ +import peppy +import os +from pepdbagent.exceptions import ProjectNotFoundError - def test_connection_initializes_correctly_from_dsn( - self, mocker, sql_output_for_check_conn_db, test_dsn - ): - mocker.patch("psycopg2.connect") - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) +from sqlalchemy.exc import NoResultFound - c = PEPDatabaseAgent(dsn=test_dsn) +DNS = f"postgresql://postgres:docker@localhost:5432/pep-db" - assert c.connection.db_name == "pep-base-sql" - assert c.connection.pg_connection.autocommit - def test_connection_initializes_correctly_without_dsn( - self, mocker, sql_output_for_check_conn_db - ): - mocker.patch("psycopg2.connect") - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) +DATA_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "tests", + "data", +) - c = PEPDatabaseAgent( - host="localhost", - port="5432", - database="pep-base-sql", - user="postgres", - password="docker", - ) - assert c.connection.db_name == "pep-base-sql" - assert c.connection.pg_connection.autocommit +def get_path_to_example_file(namespace, project_name): + return os.path.join(DATA_PATH, namespace, project_name, "project_config.yaml") class TestProject: - def test_upload_project_success( - selfm, mocker, sql_output_for_check_conn_db, test_dsn, test_peppy_project - ): - database_commit_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.commit_to_database" - ) - mocker.patch("psycopg2.connect") - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) - c = PEPDatabaseAgent(dsn=test_dsn) - - test_namespace = "test" - - c.project.create(test_peppy_project, test_namespace) - - assert database_commit_mock.called - - def test_upload_project_updates_after_raising_unique_violation_error( - self, mocker, sql_output_for_check_conn_db, test_dsn, test_peppy_project - ): - update_project_mock = mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseProject._overwrite" - ) - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) - mocker.patch("psycopg2.connect") - - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.commit_to_database", - side_effect=UniqueViolation(), - ) - - c = PEPDatabaseAgent(dsn=test_dsn) - test_namespace = "test" - c.project._overwrite(test_peppy_project, test_namespace, overwrite=True) - - assert update_project_mock.called - - def test_update_project( - self, mocker, test_dsn, test_peppy_project, sql_output_for_check_conn_db - ): - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) - mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseProject.exists", - return_value=True, - ) - database_commit_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.commit_to_database" - ) - mocker.patch("psycopg2.connect") - - c = PEPDatabaseAgent(dsn=test_dsn) - - test_proj_dict = test_peppy_project.to_dict(extended=True) - test_proj_dict = json.dumps(test_proj_dict) - - c.project._overwrite( - test_proj_dict, - namespace="test", - proj_name="test", - tag="test", - project_digest="aaa", - number_of_samples=5, - ) - - assert database_commit_mock.called - - def test_update_item( - self, mocker, test_dsn, test_peppy_project, sql_output_for_check_conn_db - ): - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) - mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseProject.exists", - return_value=True, - ) - database_commit_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.commit_to_database" - ) - mocker.patch("psycopg2.connect") - - c = PEPDatabaseAgent(dsn=test_dsn) - - test_peppy_project.description = "This is test description" - - c.project.update( - update_dict={ - "tag": "new_tag", - "is_private": True, - "project": test_peppy_project, - }, - namespace="test", - name="test", - tag="tag", - ) - - assert database_commit_mock.called - - def test_delete_project(self, mocker, test_dsn, sql_output_for_check_conn_db): - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) - mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseProject.exists", - return_value=True, - ) - - database_commit_mock = mocker.patch("psycopg2.connect") - - mocker.patch("psycopg2.connect") - - c = PEPDatabaseAgent(dsn=test_dsn) - - ret = c.project.delete(namespace="test", name="test", tag="test") - - assert ret is None - - def test_get_project_by_registry_path( - self, mocker, test_dsn, sql_output_for_check_conn_db - ): - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) - get_project_mock = mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseProject.get", - return_value=sql_output_for_check_conn_db, - ) - mocker.patch("psycopg2.connect") - - c = PEPDatabaseAgent(dsn=test_dsn) - - c.project.get_by_rp("some/project:tag") - - get_project_mock.assert_called_with( - namespace="some", name="project", tag="tag", raw=False - ) - - def test_get_project( - self, - mocker, - test_dsn, - sql_output_for_check_conn_db, - test_database_project_return, - ): - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=sql_output_for_check_conn_db, - ) - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchone", - return_value=test_database_project_return, - ) - mocker.patch("psycopg2.connect") - - c = PEPDatabaseAgent(dsn=test_dsn) - - project = c.project.get( - namespace="test_namespace", - name="test_name", - tag="test_tag", - ) - - assert project.name == "public_project" - assert not project.description + """ + Test project C + """ - def test_project_exists( - self, + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ["namespace1", "basic"], + ["namespace2", "derive"], + ["namespace2", "imply"], + ["namespace3", "piface"], + ["namespace3", "subtable2"], + ], + ) + def test_get_project(self, initiate_pepdb_con, namespace, name): + kk = initiate_pepdb_con.project.get( + namespace=namespace, name=name, tag="default" + ) + ff = peppy.Project(get_path_to_example_file(namespace, name)) + assert kk == ff + + @pytest.mark.parametrize( + "namespace, name,tag", + [ + ["incorrect_namespace", "amendments1", "default"], + ["namespace1", "subtable2", "default"], + ["namespace3", "basic", "default"], + ["namespace3", "subtable2", "incorrect_tag"], + ["namespace1", "incorrect_name", "default"], + ], + ) + def test_get_project_error(self, initiate_pepdb_con, namespace, name, tag): + with pytest.raises(NoResultFound, match="No row was found"): + kk = initiate_pepdb_con.project.get(namespace=namespace, name=name, tag=tag) + + @pytest.mark.parametrize( + "namespace, name,new_name", + [ + ["namespace1", "amendments1", "name1"], + ["namespace1", "amendments2", "name2"], + ["namespace2", "derive", "name3"], + ["namespace1", "basic", "name4"], + ["namespace2", "derive", "name5"], + ], + ) + def test_update_project_name(self, initiate_pepdb_con, namespace, name, new_name): + initiate_pepdb_con.project.update( + namespace=namespace, + name=name, + tag="default", + update_dict={"name": new_name}, + ) + assert initiate_pepdb_con.project.exists( + namespace=namespace, name=new_name, tag="default" + ) + + @pytest.mark.parametrize( + "namespace, name, new_tag", + [ + ["namespace1", "amendments1", "tag1"], + ["namespace1", "amendments2", "tag2"], + ["namespace2", "derive", "tag3"], + ["namespace1", "basic", "tag4"], + ["namespace2", "derive", "tag5"], + ], + ) + def test_update_project_tag(self, initiate_pepdb_con, namespace, name, new_tag): + initiate_pepdb_con.project.update( + namespace=namespace, name=name, tag="default", update_dict={"tag": new_tag} + ) + assert initiate_pepdb_con.project.exists( + namespace=namespace, name=name, tag=new_tag + ) + + @pytest.mark.parametrize( + "namespace, name, new_description", + [ + ["namespace1", "amendments1", "desc1 f"], + ["namespace1", "amendments2", "desc2 f"], + ["namespace2", "derive", "desc3 f"], + ["namespace1", "basic", "desc4 f"], + ["namespace2", "derive", "desc5 f"], + ], + ) + def test_update_project_description( + self, initiate_pepdb_con, namespace, name, new_description ): - pass - + prj = initiate_pepdb_con.project.get(namespace=namespace, name=name) + prj.description = new_description + initiate_pepdb_con.project.update( + namespace=namespace, name=name, tag="default", update_dict={"project": prj} + ) + + assert ( + initiate_pepdb_con.project.get(namespace=namespace, name=name).description + == new_description + ) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ["namespace2", "derive"], + ["namespace2", "imply"], + ], + ) + def test_update_whole_config(self, initiate_pepdb_con, namespace, name): + new_prj = initiate_pepdb_con.project.get(namespace="namespace1", name="basic") + # update name. If name is different, it will update name too + new_prj.name = name + initiate_pepdb_con.project.update( + namespace=namespace, + name=name, + tag="default", + update_dict={"project": new_prj}, + ) + + assert initiate_pepdb_con.project.get(namespace=namespace, name=name) == new_prj + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ["namespace2", "derive"], + ["namespace2", "imply"], + ], + ) + def test_overwrite_project(self, initiate_pepdb_con, namespace, name): + new_prj = initiate_pepdb_con.project.get(namespace="namespace1", name="basic") + + initiate_pepdb_con.project.create( + project=new_prj, + namespace=namespace, + name=name, + tag="default", + overwrite=True, + ) + + assert initiate_pepdb_con.project.get(namespace=namespace, name=name) == new_prj + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ["namespace2", "derive"], + ["namespace2", "imply"], + ], + ) + def test_delete_project(self, initiate_pepdb_con, namespace, name): + initiate_pepdb_con.project.delete(namespace=namespace, name=name, tag="default") + + with pytest.raises( + NoResultFound, match="No row was found when one was required" + ): + kk = initiate_pepdb_con.project.get( + namespace=namespace, name=name, tag="default" + ) class TestAnnotation: """ Test function within annotation class """ - - @pytest.fixture(scope="function") - def initiate_con( - self, - mocker, - test_dsn, - ): - mocker.patch("psycopg2.connect") - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection._check_conn_db", - return_value=True, - ) - instance = PEPDatabaseAgent(dsn=test_dsn) - - yield instance - - def test_get_anno_by_providing_list(self, initiate_con, mocker): - get_single_annot_mock = mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseAnnotation._get_single_annotation", - ) - initiate_con.annotation.get_by_rp(["this/is:one", "This/if:two"]) - assert get_single_annot_mock.called - - def test_get_annotation_of_single_project(self, mocker, initiate_con): - run_sql_one_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchone", - return_value=[ - "1", - "2", - "3", - False, - 5, - 6, - datetime.datetime.now(), - datetime.datetime.now(), - "9", - "10", - ], - ) - initiate_con.annotation.get("test", "project", "pr") - assert run_sql_one_mock.called - - def test_get_annotation_of_single_project_by_rp(self, mocker, initiate_con): - run_sql_one_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchone", - return_value=[ - "1", - "2", - "3", - False, - 5, - 6, - datetime.datetime.now(), - datetime.datetime.now(), - "9", - "10", - ], - ) - initiate_con.annotation.get_by_rp("test/project:pr") - assert run_sql_one_mock.called - - def test_get_annotation_within_namespace(self, mocker, initiate_con): - run_sql_one_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=[ - ( - "1", - "2", - "3", - 6, - "5", - "dgs", - False, - datetime.datetime.now(), - datetime.datetime.now(), - ), - ( - "1", - "5", - "3", - 6, - "5", - "dgs", - False, - datetime.datetime.now(), - datetime.datetime.now(), - ), - ], - ) - count_prj_mock = mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseAnnotation._count_projects", - return_value=2, - ) - f = initiate_con.annotation.get(namespace="1") - assert f.count == 2 - assert len(f.results) == 2 - - def test_get_annotation_by_providing_query(self, mocker, initiate_con): - run_sql_one_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=[ - ( - "1", - "2", - "3", - 6, - "5", - "dgs", - False, - datetime.datetime.now(), - datetime.datetime.now(), - ), - ( - "1", - "5", - "3", - 6, - "5", - "dgs", - False, - datetime.datetime.now(), - datetime.datetime.now(), - ), - ], - ) - count_prj_mock = mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseAnnotation._count_projects", - return_value=2, - ) - f = initiate_con.annotation.get(query="1") - assert f.count == 2 - assert len(f.results) == 2 - - def test_registry_path_exception_pass(self, initiate_con): - initiate_con.annotation.get_by_rp(["this/is:one", "This/is/f:two"]) - - def test_registry_paths_exception(self, initiate_con): - with pytest.raises(Exception): - initiate_con.annotation.get_by_rp("This/is/wrong:registry") + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ["namespace2", "derive"], + ["namespace2", "imply"], + ["namespace3", "subtable1"], + ], + ) + def test_annotation_of_one_project(self, initiate_pepdb_con, namespace, name): + result = initiate_pepdb_con.annotation.get(namespace=namespace, name=name, tag="default",) + assert result.results[0].namespace == namespace + + @pytest.mark.parametrize( + "namespace, n_projects", + [ + ["namespace1", 6], + ["namespace2", 8], + ["namespace3", 10], + ["private", 0], + ["private_test", 0], + ], + ) + def test_annotation_all(self, initiate_pepdb_con, namespace, n_projects): + result = initiate_pepdb_con.annotation.get(namespace=namespace,) + assert result.count == n_projects + assert len(result.results) == n_projects + + @pytest.mark.parametrize( + "namespace, n_projects", + [ + ["namespace1", 6], + ["namespace2", 8], + ["namespace3", 10], + ["private", 0], + ["private_test", 6], + ], + ) + @pytest.mark.parametrize("admin", ("private_test", ["private_test", "bbb"])) + def test_annotation_all_private(self, initiate_pepdb_con, namespace, n_projects, admin): + result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin) + assert result.count == n_projects + assert len(result.results) == n_projects + + @pytest.mark.parametrize( + "namespace, limit, n_projects", + [ + ["namespace1", 3, 6], + ["namespace2", 2, 8], + ["namespace3", 8, 10], + ["private", 0, 0], + ["private_test", 5, 6], + ], + ) + @pytest.mark.parametrize("admin", ("private_test", ["private_test", "bbb"])) + def test_annotation_limit(self, initiate_pepdb_con, namespace, limit, admin, n_projects): + result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin, limit=limit) + assert result.count == n_projects + assert len(result.results) == limit + + @pytest.mark.parametrize( + "namespace, order_by, first_name", + [ + ["namespace1", "name", "amendments1"], + ["namespace2", "name", "biocproject_exceptions"], + ["namespace3", "name", "node_alias"], + ["private_test", "name", "amendments1"], + ], + ) + @pytest.mark.parametrize("admin", ["private_test"]) + def test_order_by(self, initiate_pepdb_con, namespace, admin, order_by, first_name): + result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin, order_by=order_by) + assert result.results[0].name == first_name + + @pytest.mark.parametrize( + "namespace, order_by, last_name", + [ + ["namespace1", "name", "biocproject"], + ["namespace2", "name", "imports"], + ["namespace3", "name", "subtables"], + ["private_test", "name", "subtable3"], + ], + ) + @pytest.mark.parametrize("admin", ["private_test"]) + def test_order_by_desc(self, initiate_pepdb_con, namespace, admin, order_by, last_name): + result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin, order_by=order_by, order_desc=True) + assert result.results[0].name == last_name + + @pytest.mark.parametrize( + "namespace, query, found_number", + [ + ["namespace1", "ame", 2], + ["namespace2", "proj", 2], + ["namespace3", "ABLE", 6], + ["private_test", "a", 0], + [None, "re", 2] + ], + ) + def test_name_search(self, initiate_pepdb_con, namespace, query, found_number): + result = initiate_pepdb_con.annotation.get(namespace=namespace, query=query) + assert len(result.results) == found_number + + @pytest.mark.parametrize( + "namespace, query, found_number", + [ + ["namespace1", "ame", 2], + ["namespace2", "proj", 2], + ["namespace3", "ABLE", 6], + ["private_test", "b", 2], + [None, "re", 3] + ], + ) + def test_name_search_private(self, initiate_pepdb_con, namespace, query, found_number): + result = initiate_pepdb_con.annotation.get(namespace=namespace, query=query, admin="private_test") + assert len(result.results) == found_number class TestNamespace: @@ -382,41 +310,10 @@ class TestNamespace: Test function within namespace class """ - @pytest.fixture(scope="function") - def initiate_con( - self, - mocker, - test_dsn, - ): - mocker.patch("psycopg2.connect") - mocker.patch( - "pepdbagent.pepdbagent.BaseConnection._check_conn_db", - return_value=True, - ) - instance = PEPDatabaseAgent(dsn=test_dsn) - - yield instance + def test_annotation(self, initiate_pepdb_con): + result = initiate_pepdb_con.namespace.get() + assert len(result.results) == 3 - def test_get_namespace_by_providing_query(self, mocker, initiate_con): - run_sql_one_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=[("names", 2, 3)], - ) - count_prj_mock = mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseNamespace._count_namespace", - return_value=2, - ) - f = initiate_con.namespace.get(query="1") - assert len(f.results) == 1 - - def test_get_all_namespaces(self, mocker, initiate_con): - run_sql_one_mock = mocker.patch( - "pepdbagent.pepdbagent.BaseConnection.run_sql_fetchall", - return_value=[("names", 2, 3)], - ) - count_prj_mock = mocker.patch( - "pepdbagent.pepdbagent.PEPDatabaseNamespace._count_namespace", - return_value=2, - ) - f = initiate_con.namespace.get() - assert len(f.results) == 1 + def test_annotation_private(self, initiate_pepdb_con): + result = initiate_pepdb_con.namespace.get(admin="private_test") + assert len(result.results) == 4 From b6fb0427bb6eed7c34df52be5ca6c3f01dff2c4e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Jun 2023 12:12:04 -0400 Subject: [PATCH 22/32] added pep schema --- pepdbagent/db_utils.py | 2 +- pepdbagent/models.py | 3 + pepdbagent/modules/annotation.py | 4 ++ pepdbagent/modules/project.py | 16 +++++- tests/conftest.py | 1 + tests/test_pepagent.py | 99 +++++++++++++++++++++++++++----- 6 files changed, 110 insertions(+), 15 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index d45635e..ea18f3f 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -72,7 +72,7 @@ class Projects(Base): number_of_samples: Mapped[int] submission_date: Mapped[datetime.datetime] last_update_date: Mapped[datetime.datetime] - # schema: Mapped[Optional[str]] + pep_schema: Mapped[Optional[str]] __table_args__ = (PrimaryKeyConstraint("namespace", "name", "tag", name="id"),) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 2ac1013..2595109 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -20,6 +20,7 @@ class AnnotationModel(BaseModel): last_update_date: Optional[str] submission_date: Optional[str] digest: Optional[str] + pep_schema: Optional[str] class Config: allow_population_by_field_name = True @@ -74,6 +75,7 @@ class UpdateItems(BaseModel): tag: Optional[str] is_private: Optional[bool] name: Optional[str] + pep_schema: Optional[str] class Config: arbitrary_types_allowed = True @@ -94,6 +96,7 @@ class UpdateModel(BaseModel): digest: Optional[str] last_update_date: Optional[datetime.datetime] number_of_samples: Optional[int] + pep_schema: Optional[str] @validator("tag", "name") def value_must_not_be_empty(cls, v): diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index ff9e12a..e2f8c21 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -168,6 +168,7 @@ def _get_single_annotation( Projects.submission_date, Projects.last_update_date, Projects.digest, + Projects.pep_schema, ).where( and_( Projects.name == name, @@ -192,6 +193,7 @@ def _get_single_annotation( submission_date=str(query_result.submission_date), last_update_date=str(query_result.last_update_date), digest=query_result.digest, + pep_schema=query_result.pep_schema, ) _LOGGER.info( f"Annotation of the project '{namespace}/{name}:{tag}' has been found!" @@ -267,6 +269,7 @@ def _get_projects( Projects.submission_date, Projects.last_update_date, Projects.digest, + Projects.pep_schema, ).select_from(Projects) statement = self._add_condition( @@ -290,6 +293,7 @@ def _get_projects( submission_date=str(result.submission_date), last_update_date=str(result.last_update_date), digest=result.digest, + pep_schema=result.pep_schema, ) ) if not order_desc: diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index d705bdc..59507c9 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -168,7 +168,7 @@ def create( name: str = None, tag: str = DEFAULT_TAG, is_private: bool = False, - # schema: str = None, + pep_schema: str = None, overwrite: bool = False, update_only: bool = False, ) -> None: @@ -182,6 +182,7 @@ def create( :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project. :param is_private: boolean value if the project should be visible just for user that creates it. + :param pep_schema: assign PEP to a specific schema. [DefaultL: None] :param overwrite: if project exists overwrite the project, otherwise upload it. [Default: False - project won't be overwritten if it exists in db] :param update_only: if project exists overwrite it, otherwise do nothing. [Default: False] @@ -216,6 +217,7 @@ def create( project_digest=proj_digest, number_of_samples=number_of_samples, private=is_private, + pep_schema=pep_schema, ) return None else: @@ -238,6 +240,7 @@ def create( last_update_date=datetime.datetime.now( datetime.timezone.utc ), + pep_schema=pep_schema, ) ) @@ -253,6 +256,7 @@ def create( project_digest=proj_digest, number_of_samples=number_of_samples, private=is_private, + pep_schema=pep_schema, ) return None @@ -272,6 +276,7 @@ def _overwrite( project_digest: str, number_of_samples: int, private: bool = False, + pep_schema: str = None, ) -> None: """ Update existing project by providing all necessary information. @@ -283,6 +288,7 @@ def _overwrite( :param project_digest: project digest :param number_of_samples: number of samples in project :param private: boolean value if the project should be visible just for user that creates it. + :param pep_schema: assign PEP to a specific schema. [DefaultL: None] :return: None """ proj_name = proj_name.lower() @@ -301,6 +307,7 @@ def _overwrite( number_of_samples=number_of_samples, private=private, last_update_date=datetime.datetime.now(datetime.timezone.utc), + pep_schema=pep_schema, ) .where( and_( @@ -410,6 +417,13 @@ def __create_update_dict(update_values: UpdateItems) -> dict: update_final = UpdateModel( name=update_values.name, **update_final.dict(exclude_unset=True) ) + + if update_values.pep_schema is not None: + update_final = UpdateModel( + pep_schema=update_values.pep_schema, + **update_final.dict(exclude_unset=True), + ) + return update_final.dict(exclude_unset=True, exclude_none=True) def exists( diff --git a/tests/conftest.py b/tests/conftest.py index 625400c..e18c26a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -52,6 +52,7 @@ def initiate_pepdb_con( is_private=private, project=prj, overwrite=True, + pep_schema="random_schema_name", ) yield pepdb_con diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index c410e8b..e8f1146 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -129,7 +129,7 @@ def test_update_project_description( ["namespace2", "imply"], ], ) - def test_update_whole_config(self, initiate_pepdb_con, namespace, name): + def test_update_whole_project(self, initiate_pepdb_con, namespace, name): new_prj = initiate_pepdb_con.project.get(namespace="namespace1", name="basic") # update name. If name is different, it will update name too new_prj.name = name @@ -142,6 +142,26 @@ def test_update_whole_config(self, initiate_pepdb_con, namespace, name): assert initiate_pepdb_con.project.get(namespace=namespace, name=name) == new_prj + @pytest.mark.parametrize( + "namespace, name, pep_schema", + [ + ["namespace1", "amendments1", "schema1"], + ["namespace1", "amendments2", "schema2"], + ["namespace2", "derive", "schema3"], + ["namespace1", "basic", "schema4"], + ["namespace2", "derive", "schema5"], + ], + ) + def test_update_pep_schema(self, initiate_pepdb_con, namespace, name, pep_schema): + initiate_pepdb_con.project.update( + namespace=namespace, + name=name, + tag="default", + update_dict={"pep_schema": pep_schema}, + ) + res = initiate_pepdb_con.annotation.get(namespace, name, "default") + assert res.results[0].pep_schema == pep_schema + @pytest.mark.parametrize( "namespace, name", [ @@ -183,10 +203,12 @@ def test_delete_project(self, initiate_pepdb_con, namespace, name): namespace=namespace, name=name, tag="default" ) + class TestAnnotation: """ Test function within annotation class """ + @pytest.mark.parametrize( "namespace, name", [ @@ -198,7 +220,11 @@ class TestAnnotation: ], ) def test_annotation_of_one_project(self, initiate_pepdb_con, namespace, name): - result = initiate_pepdb_con.annotation.get(namespace=namespace, name=name, tag="default",) + result = initiate_pepdb_con.annotation.get( + namespace=namespace, + name=name, + tag="default", + ) assert result.results[0].namespace == namespace @pytest.mark.parametrize( @@ -212,7 +238,9 @@ def test_annotation_of_one_project(self, initiate_pepdb_con, namespace, name): ], ) def test_annotation_all(self, initiate_pepdb_con, namespace, n_projects): - result = initiate_pepdb_con.annotation.get(namespace=namespace,) + result = initiate_pepdb_con.annotation.get( + namespace=namespace, + ) assert result.count == n_projects assert len(result.results) == n_projects @@ -227,7 +255,9 @@ def test_annotation_all(self, initiate_pepdb_con, namespace, n_projects): ], ) @pytest.mark.parametrize("admin", ("private_test", ["private_test", "bbb"])) - def test_annotation_all_private(self, initiate_pepdb_con, namespace, n_projects, admin): + def test_annotation_all_private( + self, initiate_pepdb_con, namespace, n_projects, admin + ): result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin) assert result.count == n_projects assert len(result.results) == n_projects @@ -243,8 +273,12 @@ def test_annotation_all_private(self, initiate_pepdb_con, namespace, n_projects, ], ) @pytest.mark.parametrize("admin", ("private_test", ["private_test", "bbb"])) - def test_annotation_limit(self, initiate_pepdb_con, namespace, limit, admin, n_projects): - result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin, limit=limit) + def test_annotation_limit( + self, initiate_pepdb_con, namespace, limit, admin, n_projects + ): + result = initiate_pepdb_con.annotation.get( + namespace=namespace, admin=admin, limit=limit + ) assert result.count == n_projects assert len(result.results) == limit @@ -259,7 +293,9 @@ def test_annotation_limit(self, initiate_pepdb_con, namespace, limit, admin, n_p ) @pytest.mark.parametrize("admin", ["private_test"]) def test_order_by(self, initiate_pepdb_con, namespace, admin, order_by, first_name): - result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin, order_by=order_by) + result = initiate_pepdb_con.annotation.get( + namespace=namespace, admin=admin, order_by=order_by + ) assert result.results[0].name == first_name @pytest.mark.parametrize( @@ -272,8 +308,12 @@ def test_order_by(self, initiate_pepdb_con, namespace, admin, order_by, first_na ], ) @pytest.mark.parametrize("admin", ["private_test"]) - def test_order_by_desc(self, initiate_pepdb_con, namespace, admin, order_by, last_name): - result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin, order_by=order_by, order_desc=True) + def test_order_by_desc( + self, initiate_pepdb_con, namespace, admin, order_by, last_name + ): + result = initiate_pepdb_con.annotation.get( + namespace=namespace, admin=admin, order_by=order_by, order_desc=True + ) assert result.results[0].name == last_name @pytest.mark.parametrize( @@ -283,7 +323,7 @@ def test_order_by_desc(self, initiate_pepdb_con, namespace, admin, order_by, las ["namespace2", "proj", 2], ["namespace3", "ABLE", 6], ["private_test", "a", 0], - [None, "re", 2] + [None, "re", 2], ], ) def test_name_search(self, initiate_pepdb_con, namespace, query, found_number): @@ -297,13 +337,46 @@ def test_name_search(self, initiate_pepdb_con, namespace, query, found_number): ["namespace2", "proj", 2], ["namespace3", "ABLE", 6], ["private_test", "b", 2], - [None, "re", 3] + [None, "re", 3], ], ) - def test_name_search_private(self, initiate_pepdb_con, namespace, query, found_number): - result = initiate_pepdb_con.annotation.get(namespace=namespace, query=query, admin="private_test") + def test_name_search_private( + self, initiate_pepdb_con, namespace, query, found_number + ): + result = initiate_pepdb_con.annotation.get( + namespace=namespace, query=query, admin="private_test" + ) assert len(result.results) == found_number + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ["namespace2", "derive"], + ["namespace2", "imply"], + ["namespace3", "subtable1"], + ], + ) + def test_all_annotations_are_returned(self, initiate_pepdb_con, namespace, name): + result = initiate_pepdb_con.annotation.get( + namespace=namespace, + name=name, + tag="default", + ) + assert result.results[0].__fields_set__ == { + "is_private", + "tag", + "namespace", + "digest", + "description", + "number_of_samples", + "name", + "last_update_date", + "submission_date", + "pep_schema", + } + class TestNamespace: """ From 25c98897da5d62d9af98a3beea5ef1480b0d27d8 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Jun 2023 12:44:24 -0400 Subject: [PATCH 23/32] requirements + testing --- .github/workflows/new_db.yml | 48 ------------------------------- .github/workflows/pytest.yml | 21 ++++++++++---- manual_tests.py | 2 +- requirements/requirements-all.txt | 2 +- setup.py | 3 +- tests/conftest.py | 2 +- 6 files changed, 21 insertions(+), 57 deletions(-) delete mode 100644 .github/workflows/new_db.yml diff --git a/.github/workflows/new_db.yml b/.github/workflows/new_db.yml deleted file mode 100644 index 3c53b5e..0000000 --- a/.github/workflows/new_db.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Test bedstat pipeline - -on: - push: - branches: [master, dev, sqlalchemy_testing] - pull_request: - branches: [master, dev] - -jobs: - pytest: - strategy: - matrix: - python-version: [3.11] - os: [ubuntu-latest] # can't use macOS when using service containers or container jobs - r: [release] - runs-on: ${{ matrix.os }} - services: - postgres: - image: postgres - env: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: docker - POSTGRES_DB: pep-db - POSTGRES_HOST: localhost - ports: - - 5432:5432 - options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 - steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dev dependancies - run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi - - - name: Install test dependancies - run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi - - - name: Install package - run: python -m pip install . - - - name: Run pytest tests - run: pytest tests -x -vv - - diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 131693e..c0f3b03 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -2,18 +2,29 @@ name: Run pytests on: push: - branches: [dev] + branches: [master, dev] pull_request: branches: [master, dev] jobs: pytest: - runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] - os: [ubuntu-20.04] - + python-version: [3.8, 3.11] + os: [ubuntu-latest] # can't use macOS when using service containers or container jobs + r: [release] + runs-on: ${{ matrix.os }} + services: + postgres: + image: postgres + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: docker + POSTGRES_DB: pep-db + POSTGRES_HOST: localhost + ports: + - 5432:5432 + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 steps: - uses: actions/checkout@v2 diff --git a/manual_tests.py b/manual_tests.py index 8429a54..185cab1 100644 --- a/manual_tests.py +++ b/manual_tests.py @@ -9,7 +9,7 @@ ############### # # Upload prj = peppy.Project( - "/home/bnt4me/virginia/repos/pepdbagent/sample_pep/basic/project_config.yaml" + "/home/bnt4me/virginia/repos/pepdbagent/tests/data/namespace1/basic/project_config.yaml" ) con.project.create(project=prj, namespace="Khoroshevskyi", name="dupa", tag="test1", overwrite=True) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 3f46706..fe817d2 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,6 @@ sqlalchemy>=2.0.0 logmuse -peppy>=0.35.4 +peppy>=0.35.5 ubiquerg>=0.6.2 coloredlogs>=15.0.1 pytest-mock diff --git a/setup.py b/setup.py index 2a124b7..81cbf74 100644 --- a/setup.py +++ b/setup.py @@ -46,11 +46,12 @@ def get_static(name, condition=None): long_description=long_description, long_description_content_type="text/markdown", classifiers=[ - "Development Status :: 1 - Planning", + "Development Status :: 3 - Alpha", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Bio-Informatics", ], keywords="project, metadata, bioinformatics, database", diff --git a/tests/conftest.py b/tests/conftest.py index e18c26a..8c9c3dc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,7 +37,7 @@ def initiate_pepdb_con( sa_engine = create_engine(DNS) with sa_engine.begin() as conn: conn.execute(text("DROP table IF EXISTS projects")) - pepdb_con = PEPDatabaseAgent(dsn=DNS, echo=True) + pepdb_con = PEPDatabaseAgent(dsn=DNS, echo=False) for namespace, item in list_of_available_peps.items(): if namespace == "private_test": private = True From 9ddd156c134585ec79390cd3b1f7cdc2f589befa Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Jun 2023 13:44:04 -0400 Subject: [PATCH 24/32] new docstring, readme, version --- docs/README.md | 9 ++++++-- docs/changelog.md | 11 +++++++++- docs/db_tutorial.md | 6 +++++- manual_tests.py | 49 +++++++++++++++++++++++++++++++++++++++++- pep_db/pep_db.sql | 1 + pepdbagent/_version.py | 2 +- 6 files changed, 72 insertions(+), 6 deletions(-) diff --git a/docs/README.md b/docs/README.md index 2aacfb7..ccde8bc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -101,7 +101,8 @@ AnnotationRetrunModel(count=1, description=None, last_update_date='2022-11-09', submission_date='2023-01-09', - digest='36bb973f2eca3706ed9852abddd') + digest='36bb973f2eca3706ed9852abddd', + pep_schema="bedmake")]) ``` @@ -128,4 +129,8 @@ NamespaceReturnModel(count=1, limit=100, offset=0, results=[NamespaceResultModel(namespace='databio', number_of_projects=6, number_of_samples=470)]) -``` \ No newline at end of file +``` + + +# Example PEPs +To populate database with example peps use function written in manual tests: [Manual test](../manual_tests.py) \ No newline at end of file diff --git a/docs/changelog.md b/docs/changelog.md index 84c88d4..61ffcf6 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,13 +2,22 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.4.0] -- 2023-06-01 +- Transitioned to SQLAlchemy ORM. +- Added a pep_schema column to the database. +- Implemented a new testing approach. +- Integrated sorting functionality into the annotations module. +- Temporarily disabled description-based searches to mitigate long processing times and reduce database load. +- Included timezone support in the database. +- Standardized namespace and names to lowercase for case-insensitivity. +- Streamlined the database schema creation process, with tables now created dynamically as needed. + ## [0.3.1] -- 2023-03-23 - Fixed bug with peppy const dependencies ## [0.3.0] -- 2023-01-19 - - Restructured pepdbagent: - Renamed `Agent` class to `PEPDatabaseAgent` - created subclasses (project, annotation, namespace). diff --git a/docs/db_tutorial.md b/docs/db_tutorial.md index 8fd00a2..a80defa 100644 --- a/docs/db_tutorial.md +++ b/docs/db_tutorial.md @@ -14,4 +14,8 @@ Now db is installed `docker exec -it 65f bash` -`psql -U postgres -d pep-db` \ No newline at end of file +`psql -U postgres -d pep-db` + +--- +If you have your own database, you can initialize a connection using pepdbagent. +The pepdbagent will create a new database schema if it doesn't already exist, or throw an exception if the schema is incorrect. \ No newline at end of file diff --git a/manual_tests.py b/manual_tests.py index 185cab1..5113284 100644 --- a/manual_tests.py +++ b/manual_tests.py @@ -1,13 +1,60 @@ # file for manual local tests import peppy - +import os import pepdbagent from peppy import Project +DATA_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "tests", + "data", +) + + +def get_path_to_example_file(namespace, project_name): + return os.path.join(DATA_PATH, namespace, project_name, "project_config.yaml") + + +def list_of_available_peps(): + pep_namespaces = os.listdir(DATA_PATH) + projects = {} + for np in pep_namespaces: + pep_name = os.listdir(os.path.join(DATA_PATH, np)) + projects[np] = {p: get_path_to_example_file(np, p) for p in pep_name} + return projects + + +def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): + list_of_peps = list_of_available_peps() + for namespace, item in list_of_peps .items(): + if namespace == "private_test": + private = True + else: + private = False + for name, path in item.items(): + prj = peppy.Project(path) + connection.project.create( + namespace=namespace, + name=name, + tag="default", + is_private=private, + project=prj, + overwrite=True, + pep_schema="random_schema_name", + ) + + return None + + +# populate database with few peps: con = pepdbagent.PEPDatabaseAgent(dsn="postgresql://postgres:docker@localhost:5432/pep-db", echo=True) +upload_sample_pep_to_db(con) + + ############### # # Upload + prj = peppy.Project( "/home/bnt4me/virginia/repos/pepdbagent/tests/data/namespace1/basic/project_config.yaml" ) diff --git a/pep_db/pep_db.sql b/pep_db/pep_db.sql index 799533b..2693ce3 100644 --- a/pep_db/pep_db.sql +++ b/pep_db/pep_db.sql @@ -16,6 +16,7 @@ CREATE TABLE projects ( number_of_samples int NOT NULL, submission_date timestamp NOT NULL, last_update_date timestamp NOT NULL, + pep_schema TEXT, CONSTRAINT id PRIMARY KEY (namespace, name, tag) ); diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 260c070..6a9beea 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.3.1" +__version__ = "0.4.0" From 95daf3b87ee1731fa30a8733cf7dd2d8e0f28a75 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 7 Jun 2023 15:00:38 -0400 Subject: [PATCH 25/32] minor readme updates --- README.md | 13 ++++++------- docs/README.md | 6 +++++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e310871..9d7832d 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,15 @@ # pepdbagent -pepdbagent is a python library and toolkit that gives a user user-friendly +`pepdbagent` is a Python library and toolkit that gives a user-friendly interface to connect, upload, update and retrieve information from pep-db. -**pep-db** is and postgres database created for storing [PEPs](http://pep.databio.org/en/latest/). -It is a backend database for PEPhub. database enables storing huge projects and provides fast speed of retrieving them. +**pep-db** is a postgres database created for storing [PEPs](http://pep.databio.org/en/latest/). +It is a backend database for PEPhub. -Before using pepdbagent, you should install or have access to pep-db. +Before using pepdbagent, you should install or have access to a pep-db instance. -To install pep-db you can use this tutorial: +To run a pep-db instance, you can use this tutorial: - [pep-db installation](./docs/db_tutorial.md) - -pepdbagent tutorial is here: +Then, follow the `pepdbagent` tutorial here: - [pedbagent](./docs/README.md) \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 2aacfb7..d9965b3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -10,6 +10,7 @@ to increase readability, maintainability, and user experience of pepdbagent, whi PEPDatabaseAgent is the primary class that you will use. It connects to the database (using **BaseConnection** class). Example: Instiantiate a PEPDatabaseAgent object and connect to database: + ```python import pepdbagent @@ -30,9 +31,12 @@ Example: ```python import peppy -prj_obj = peppy.Project("/path/to/project_config.yaml") +prj_obj = peppy.Project("sample_pep/basic/project_config.yaml") # create a project +namespace = "demo" +name = "basic_project" +tag = None agent.project.create(prj_obj, namespace, name, tag) # updating record in database (project) From f0e5040a44a29b8b6417e4a69685e4a87c4d8bd3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Jun 2023 15:28:02 -0400 Subject: [PATCH 26/32] typo fix --- manual_tests.py | 9 +++------ pepdbagent/modules/annotation.py | 2 +- pepdbagent/modules/project.py | 7 +++++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/manual_tests.py b/manual_tests.py index 5113284..fd18b3f 100644 --- a/manual_tests.py +++ b/manual_tests.py @@ -61,16 +61,13 @@ def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): con.project.create(project=prj, namespace="Khoroshevskyi", name="dupa", tag="test1", overwrite=True) con.project.exists(namespace="Khoroshevskyi", name="dupa", tag="test1") -con.project.update(update_dict={"is_private": False}, namespace="Khoroshevskyi", name="dupa", tag="test1") +# con.project.update(update_dict={"is_private": False}, namespace="Khoroshevskyi", name="dupa", tag="test1") # # Project # prj_dow = con.project.get(namespace="Khoroshevskyi", name="dupa", tag="test1") -exit(1) -print(prj_dow.name) - -prj_raw = con.project.get(namespace="Khoroshfevskyi", name="dupa", tag="test1", raw=True) +prj_raw = con.project.get(namespace="Khoroshevskyi", name="dupa", tag="test1", raw=True) print(prj_raw) @@ -78,7 +75,7 @@ def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): ############### # Annotation -dd_list = con.adialectnnotation.get_by_rp( +dd_list = con.annotation.get_by_rp( [ "Khoroshevskyi/gse_yaml:default", "Khoroshevskyi/gse_yaml:default", diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index e2f8c21..ce3c924 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -182,7 +182,7 @@ def _get_single_annotation( ) query_result = self._pep_db_engine.session_execute(statement).first() - if len(query_result) > 0: + if query_result: annot = AnnotationModel( namespace=query_result.namespace, name=query_result.name, diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 59507c9..cbd5270 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -5,7 +5,7 @@ import peppy from sqlalchemy import Engine, and_, delete, insert, or_, select, update -from sqlalchemy.exc import IntegrityError +from sqlalchemy.exc import IntegrityError, NoResultFound from pepdbagent.const import * from pepdbagent.db_utils import Projects, BaseEngine @@ -71,7 +71,10 @@ def get( ) ) - found_prj = self._pep_db_engine.session_execute(statement).one() + try: + found_prj = self._pep_db_engine.session_execute(statement).one() + except NoResultFound: + raise ProjectNotFoundError if found_prj: _LOGGER.info( From 985b1184f84c300e95841a20a1e685b8639f2532 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Jun 2023 15:35:19 -0400 Subject: [PATCH 27/32] fixed tests --- .github/workflows/pytest.yml | 2 +- tests/test_pepagent.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c0f3b03..9a1cb16 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -10,7 +10,7 @@ jobs: pytest: strategy: matrix: - python-version: [3.8, 3.11] + python-version: ["3.8", "3.11"] os: [ubuntu-latest] # can't use macOS when using service containers or container jobs r: [release] runs-on: ${{ matrix.os }} diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index e8f1146..2d7eda0 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -3,7 +3,6 @@ import os from pepdbagent.exceptions import ProjectNotFoundError -from sqlalchemy.exc import NoResultFound DNS = f"postgresql://postgres:docker@localhost:5432/pep-db" @@ -54,7 +53,7 @@ def test_get_project(self, initiate_pepdb_con, namespace, name): ], ) def test_get_project_error(self, initiate_pepdb_con, namespace, name, tag): - with pytest.raises(NoResultFound, match="No row was found"): + with pytest.raises(ProjectNotFoundError, match="Project does not exist."): kk = initiate_pepdb_con.project.get(namespace=namespace, name=name, tag=tag) @pytest.mark.parametrize( @@ -196,9 +195,7 @@ def test_overwrite_project(self, initiate_pepdb_con, namespace, name): def test_delete_project(self, initiate_pepdb_con, namespace, name): initiate_pepdb_con.project.delete(namespace=namespace, name=name, tag="default") - with pytest.raises( - NoResultFound, match="No row was found when one was required" - ): + with pytest.raises(ProjectNotFoundError, match="Project does not exist."): kk = initiate_pepdb_con.project.get( namespace=namespace, name=name, tag="default" ) From 2edbf11409316d83dcbfb214dfca2564e469984e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Jun 2023 16:20:51 -0400 Subject: [PATCH 28/32] fixed NL comments --- pepdbagent/const.py | 27 +-------------------------- pepdbagent/db_utils.py | 10 +++++----- pepdbagent/modules/annotation.py | 5 ++--- pepdbagent/modules/namespace.py | 4 ++-- pepdbagent/modules/project.py | 2 +- 5 files changed, 11 insertions(+), 37 deletions(-) diff --git a/pepdbagent/const.py b/pepdbagent/const.py index 01cf9cf..09acb57 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -1,29 +1,4 @@ -DB_TABLE_NAME = "projects" -ID_COL = "id" -PROJ_COL = "project_value" -ANNO_COL = "anno_info" -NAMESPACE_COL = "namespace" -NAME_COL = "name" -TAG_COL = "tag" -DIGEST_COL = "digest" -PRIVATE_COL = "private" -N_SAMPLES_COL = "number_of_samples" -SUBMISSION_DATE_COL = "submission_date" -LAST_UPDATE_DATE_COL = "last_update_date" - - -DB_COLUMNS = [ - ID_COL, - DIGEST_COL, - PROJ_COL, - NAMESPACE_COL, - NAME_COL, - TAG_COL, - PRIVATE_COL, - N_SAMPLES_COL, - SUBMISSION_DATE_COL, - LAST_UPDATE_DATE_COL, -] +PKG_NAME = "pepdbagent" DEFAULT_NAMESPACE = "_" DEFAULT_TAG = "default" diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index ea18f3f..e127ffb 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -24,22 +24,22 @@ mapped_column, ) -from pepdbagent.const import POSTGRES_DIALECT +from pepdbagent.const import POSTGRES_DIALECT, PKG_NAME from pepdbagent.exceptions import SchemaError -_LOGGER = logging.getLogger("pepdbagent") +_LOGGER = logging.getLogger(PKG_NAME) class BIGSERIAL(BigInteger): pass -@compiles(BIGSERIAL, "postgresql") +@compiles(BIGSERIAL, POSTGRES_DIALECT) def compile_bigserial_pg(type_, compiler, **kw): return "BIGSERIAL" -@compiles(JSONB, "postgresql") +@compiles(JSONB, POSTGRES_DIALECT) def compile_jsonb_pg(type_, compiler, **kw): return "JSONB" @@ -160,7 +160,7 @@ def engine(self): def _start_session(self): session = Session(self.engine) try: - session.execute(select(Projects)).first() + session.execute(select(Projects).limit(1)) except ProgrammingError: raise SchemaError() diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index ce3c924..afd5841 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -6,14 +6,13 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.sql.selectable import Select -from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, DEFAULT_TAG +from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, DEFAULT_TAG, PKG_NAME from pepdbagent.db_utils import Projects, BaseEngine from pepdbagent.exceptions import ProjectNotFoundError, RegistryPathError from pepdbagent.models import AnnotationList, AnnotationModel from pepdbagent.utils import registry_path_converter, tuple_converter - -_LOGGER = logging.getLogger("pepdbagent") +_LOGGER = logging.getLogger(PKG_NAME) class PEPDatabaseAnnotation: diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index a6070e2..a33a94d 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -4,12 +4,12 @@ from sqlalchemy import distinct, func, or_, select from sqlalchemy.sql.selectable import Select -from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET +from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, PKG_NAME from pepdbagent.db_utils import Projects, BaseEngine from pepdbagent.models import Namespace, NamespaceList from pepdbagent.utils import tuple_converter -_LOGGER = logging.getLogger("pepdbagent") +_LOGGER = logging.getLogger(PKG_NAME) class PEPDatabaseNamespace: diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index cbd5270..c295976 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -14,7 +14,7 @@ from pepdbagent.utils import create_digest, registry_path_converter -_LOGGER = logging.getLogger("pepdbagent") +_LOGGER = logging.getLogger(PKG_NAME) class PEPDatabaseProject: From 725030632ed388e70e9ccda49cc9b6cedf7e5ca3 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 7 Jun 2023 17:12:13 -0400 Subject: [PATCH 29/32] black lines to 99 --- pepdbagent/exceptions.py | 4 +-- pepdbagent/modules/annotation.py | 34 +++++++++------------ pepdbagent/modules/namespace.py | 9 +++--- pepdbagent/modules/project.py | 35 +++++++--------------- pyproject.toml | 4 +++ setup.py | 8 ++--- tests/test_pepagent.py | 51 +++++++++++++------------------- 7 files changed, 57 insertions(+), 88 deletions(-) create mode 100644 pyproject.toml diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index afa7f2c..f63b98f 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -10,9 +10,7 @@ def __init__(self, msg): class SchemaError(PEPDatabaseAgentError): def __init__(self): - super().__init__( - """PEP_db connection error! The schema of connected db is incorrect""" - ) + super().__init__("""PEP_db connection error! The schema of connected db is incorrect""") class RegistryPathError(PEPDatabaseAgentError): diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index ce3c924..b8d265a 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -83,9 +83,7 @@ def get( return AnnotationList( limit=limit, offset=offset, - count=self._count_projects( - namespace=namespace, search_str=query, admin=admin - ), + count=self._count_projects(namespace=namespace, search_str=query, admin=admin), results=self._get_projects( namespace=namespace, search_str=query, @@ -121,9 +119,7 @@ def get_by_rp( _LOGGER.error(str(err), registry_paths) continue try: - single_return = self._get_single_annotation( - namespace, name, tag, admin - ) + single_return = self._get_single_annotation(namespace, name, tag, admin) if single_return: anno_results.append(single_return) except ProjectNotFoundError: @@ -195,14 +191,10 @@ def _get_single_annotation( digest=query_result.digest, pep_schema=query_result.pep_schema, ) - _LOGGER.info( - f"Annotation of the project '{namespace}/{name}:{tag}' has been found!" - ) + _LOGGER.info(f"Annotation of the project '{namespace}/{name}:{tag}' has been found!") return annot else: - raise ProjectNotFoundError( - f"Project '{namespace}/{name}:{tag}' was not found." - ) + raise ProjectNotFoundError(f"Project '{namespace}/{name}:{tag}' was not found.") def _count_projects( self, @@ -221,7 +213,10 @@ def _count_projects( admin = [] statement = select(func.count()).select_from(Projects) statement = self._add_condition( - statement, namespace=namespace, search_str=search_str, admin_list=admin + statement, + namespace=namespace, + search_str=search_str, + admin_list=admin, ) result = self._pep_db_engine.session_execute(statement).first() @@ -253,9 +248,7 @@ def _get_projects( :param order_desc: Sort the records in descending order. [Default: False] :return: list of found projects with their annotations. """ - _LOGGER.info( - f"Running annotation search: (namespace: {namespace}, query: {search_str}." - ) + _LOGGER.info(f"Running annotation search: (namespace: {namespace}, query: {search_str}.") if admin is None: admin = [] @@ -273,7 +266,10 @@ def _get_projects( ).select_from(Projects) statement = self._add_condition( - statement, namespace=namespace, search_str=search_str, admin_list=admin + statement, + namespace=namespace, + search_str=search_str, + admin_list=admin, ) statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) statement = statement.limit(limit).offset(offset) @@ -383,9 +379,7 @@ def get_project_number_in_namespace( if admin is None: admin = [] statement = ( - select(func.count()) - .select_from(Projects) - .where(Projects.namespace == namespace) + select(func.count()).select_from(Projects).where(Projects.namespace == namespace) ) statement = statement.where( or_(Projects.private.is_(False), Projects.namespace.in_(admin)) diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index a6070e2..deebba6 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -47,16 +47,17 @@ def get( search results } """ - _LOGGER.info( - f"Getting namespaces annotation with provided info: (query: {query})" - ) + _LOGGER.info(f"Getting namespaces annotation with provided info: (query: {query})") admin_tuple = tuple_converter(admin) return NamespaceList( count=self._count_namespace(search_str=query, admin_nsp=admin_tuple), limit=limit, offset=offset, results=self._get_namespace( - search_str=query, admin_nsp=admin_tuple, limit=limit, offset=offset + search_str=query, + admin_nsp=admin_tuple, + limit=limit, + offset=offset, ), ) diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index cbd5270..4aceee6 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -77,9 +77,7 @@ def get( raise ProjectNotFoundError if found_prj: - _LOGGER.info( - f"Project has been found: {found_prj.namespace}, {found_prj.name}" - ) + _LOGGER.info(f"Project has been found: {found_prj.namespace}, {found_prj.name}") project_value = found_prj.project_value is_private = found_prj.private if raw: @@ -201,17 +199,13 @@ def create( elif proj_dict["name"]: proj_name = proj_dict["name"].lower() else: - raise ValueError( - f"Name of the project wasn't provided. Project will not be uploaded." - ) + raise ValueError(f"Name of the project wasn't provided. Project will not be uploaded.") proj_digest = create_digest(proj_dict) number_of_samples = len(project.samples) if update_only: - _LOGGER.info( - f"Update_only argument is set True. Updating project {proj_name} ..." - ) + _LOGGER.info(f"Update_only argument is set True. Updating project {proj_name} ...") self._overwrite( project_dict=proj_dict, namespace=namespace, @@ -237,12 +231,8 @@ def create( project_value=proj_dict, number_of_samples=number_of_samples, private=is_private, - submission_date=datetime.datetime.now( - datetime.timezone.utc - ), - last_update_date=datetime.datetime.now( - datetime.timezone.utc - ), + submission_date=datetime.datetime.now(datetime.timezone.utc), + last_update_date=datetime.datetime.now(datetime.timezone.utc), pep_schema=pep_schema, ) ) @@ -321,15 +311,11 @@ def _overwrite( ) ) - _LOGGER.info( - f"Project '{namespace}/{proj_name}:{tag}' has been successfully updated!" - ) + _LOGGER.info(f"Project '{namespace}/{proj_name}:{tag}' has been successfully updated!") return None else: - raise ProjectNotFoundError( - "Project does not exist! No project will be updated!" - ) + raise ProjectNotFoundError("Project does not exist! No project will be updated!") def update( self, @@ -398,9 +384,7 @@ def __create_update_dict(update_values: UpdateItems) -> dict: update_final = UpdateModel( project_value=update_values.project_value.to_dict(extended=True), name=update_values.project_value.name, - digest=create_digest( - update_values.project_value.to_dict(extended=True) - ), + digest=create_digest(update_values.project_value.to_dict(extended=True)), last_update_date=datetime.datetime.now(datetime.timezone.utc), number_of_samples=len(update_values.project_value.samples), ) @@ -418,7 +402,8 @@ def __create_update_dict(update_values: UpdateItems) -> dict: if update_values.name is not None: update_final = UpdateModel( - name=update_values.name, **update_final.dict(exclude_unset=True) + name=update_values.name, + **update_final.dict(exclude_unset=True), ) if update_values.pep_schema is not None: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9348158 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[tool.black] +line-length = 99 +target-version = ['py38', 'py311'] +include = '\.pyi?$' diff --git a/setup.py b/setup.py index 81cbf74..0cb183d 100644 --- a/setup.py +++ b/setup.py @@ -22,9 +22,7 @@ def get_static(name, condition=None): static = [ os.path.join(name, f) - for f in os.listdir( - os.path.join(os.path.dirname(os.path.realpath(__file__)), name) - ) + for f in os.listdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), name)) ] if condition is None: return static @@ -60,8 +58,6 @@ def get_static(name, condition=None): license="BSD2", include_package_data=True, # tests_require=(["pytest"]), - setup_requires=( - ["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else [] - ), + setup_requires=(["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else []), **extra, ) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 2d7eda0..d46a882 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -36,9 +36,7 @@ class TestProject: ], ) def test_get_project(self, initiate_pepdb_con, namespace, name): - kk = initiate_pepdb_con.project.get( - namespace=namespace, name=name, tag="default" - ) + kk = initiate_pepdb_con.project.get(namespace=namespace, name=name, tag="default") ff = peppy.Project(get_path_to_example_file(namespace, name)) assert kk == ff @@ -73,9 +71,7 @@ def test_update_project_name(self, initiate_pepdb_con, namespace, name, new_name tag="default", update_dict={"name": new_name}, ) - assert initiate_pepdb_con.project.exists( - namespace=namespace, name=new_name, tag="default" - ) + assert initiate_pepdb_con.project.exists(namespace=namespace, name=new_name, tag="default") @pytest.mark.parametrize( "namespace, name, new_tag", @@ -89,11 +85,12 @@ def test_update_project_name(self, initiate_pepdb_con, namespace, name, new_name ) def test_update_project_tag(self, initiate_pepdb_con, namespace, name, new_tag): initiate_pepdb_con.project.update( - namespace=namespace, name=name, tag="default", update_dict={"tag": new_tag} - ) - assert initiate_pepdb_con.project.exists( - namespace=namespace, name=name, tag=new_tag + namespace=namespace, + name=name, + tag="default", + update_dict={"tag": new_tag}, ) + assert initiate_pepdb_con.project.exists(namespace=namespace, name=name, tag=new_tag) @pytest.mark.parametrize( "namespace, name, new_description", @@ -111,7 +108,10 @@ def test_update_project_description( prj = initiate_pepdb_con.project.get(namespace=namespace, name=name) prj.description = new_description initiate_pepdb_con.project.update( - namespace=namespace, name=name, tag="default", update_dict={"project": prj} + namespace=namespace, + name=name, + tag="default", + update_dict={"project": prj}, ) assert ( @@ -196,9 +196,7 @@ def test_delete_project(self, initiate_pepdb_con, namespace, name): initiate_pepdb_con.project.delete(namespace=namespace, name=name, tag="default") with pytest.raises(ProjectNotFoundError, match="Project does not exist."): - kk = initiate_pepdb_con.project.get( - namespace=namespace, name=name, tag="default" - ) + kk = initiate_pepdb_con.project.get(namespace=namespace, name=name, tag="default") class TestAnnotation: @@ -252,9 +250,7 @@ def test_annotation_all(self, initiate_pepdb_con, namespace, n_projects): ], ) @pytest.mark.parametrize("admin", ("private_test", ["private_test", "bbb"])) - def test_annotation_all_private( - self, initiate_pepdb_con, namespace, n_projects, admin - ): + def test_annotation_all_private(self, initiate_pepdb_con, namespace, n_projects, admin): result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin) assert result.count == n_projects assert len(result.results) == n_projects @@ -270,12 +266,8 @@ def test_annotation_all_private( ], ) @pytest.mark.parametrize("admin", ("private_test", ["private_test", "bbb"])) - def test_annotation_limit( - self, initiate_pepdb_con, namespace, limit, admin, n_projects - ): - result = initiate_pepdb_con.annotation.get( - namespace=namespace, admin=admin, limit=limit - ) + def test_annotation_limit(self, initiate_pepdb_con, namespace, limit, admin, n_projects): + result = initiate_pepdb_con.annotation.get(namespace=namespace, admin=admin, limit=limit) assert result.count == n_projects assert len(result.results) == limit @@ -305,11 +297,12 @@ def test_order_by(self, initiate_pepdb_con, namespace, admin, order_by, first_na ], ) @pytest.mark.parametrize("admin", ["private_test"]) - def test_order_by_desc( - self, initiate_pepdb_con, namespace, admin, order_by, last_name - ): + def test_order_by_desc(self, initiate_pepdb_con, namespace, admin, order_by, last_name): result = initiate_pepdb_con.annotation.get( - namespace=namespace, admin=admin, order_by=order_by, order_desc=True + namespace=namespace, + admin=admin, + order_by=order_by, + order_desc=True, ) assert result.results[0].name == last_name @@ -337,9 +330,7 @@ def test_name_search(self, initiate_pepdb_con, namespace, query, found_number): [None, "re", 3], ], ) - def test_name_search_private( - self, initiate_pepdb_con, namespace, query, found_number - ): + def test_name_search_private(self, initiate_pepdb_con, namespace, query, found_number): result = initiate_pepdb_con.annotation.get( namespace=namespace, query=query, admin="private_test" ) From fa0379370b48001e3e1b2bcaa993ace959c83a76 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 8 Jun 2023 15:02:27 -0400 Subject: [PATCH 30/32] fixed manual tests --- manual_tests.py | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/manual_tests.py b/manual_tests.py index fd18b3f..32a5a61 100644 --- a/manual_tests.py +++ b/manual_tests.py @@ -27,7 +27,7 @@ def list_of_available_peps(): def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): list_of_peps = list_of_available_peps() - for namespace, item in list_of_peps .items(): + for namespace, item in list_of_peps.items(): if namespace == "private_test": private = True else: @@ -48,7 +48,7 @@ def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): # populate database with few peps: -con = pepdbagent.PEPDatabaseAgent(dsn="postgresql://postgres:docker@localhost:5432/pep-db", echo=True) +con = pepdbagent.PEPDatabaseAgent(dsn="postgresql://postgres:docker@localhost:5432/pep-db", echo=False) upload_sample_pep_to_db(con) @@ -58,16 +58,12 @@ def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): prj = peppy.Project( "/home/bnt4me/virginia/repos/pepdbagent/tests/data/namespace1/basic/project_config.yaml" ) -con.project.create(project=prj, namespace="Khoroshevskyi", name="dupa", tag="test1", overwrite=True) +con.project.create(project=prj, namespace="dog_namespace", name="testttt", tag="test1", overwrite=True) -con.project.exists(namespace="Khoroshevskyi", name="dupa", tag="test1") -# con.project.update(update_dict={"is_private": False}, namespace="Khoroshevskyi", name="dupa", tag="test1") -# # Project +con.project.exists(namespace="dog_namespace", name="testttt", tag="test1") -# prj_dow = con.project.get(namespace="Khoroshevskyi", name="dupa", tag="test1") - -prj_raw = con.project.get(namespace="Khoroshevskyi", name="dupa", tag="test1", raw=True) +prj_raw = con.project.get(namespace="dog_namespace", name="testttt", tag="test1", raw=True) print(prj_raw) @@ -77,27 +73,27 @@ def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): dd_list = con.annotation.get_by_rp( [ - "Khoroshevskyi/gse_yaml:default", - "Khoroshevskyi/gse_yaml:default", - "Khoroshevskyi/dupa:f1", + "dog_namespace/gse_yaml:default", + "dog_namespace/gse_yaml:default", + "dog_namespace/testttt:f1", ], - admin="Khoroshevskyi", + admin="dog_namespace", ) dd_list_private = con.annotation.get_by_rp( [ - "Khoroshevskyi/gse_yaml:default", - "Khoroshevskyi/gse_yaml:default", - "Khoroshevskyi/dupa:f1", + "dog_namespace/gse_yaml:default", + "dog_namespace/gse_yaml:default", + "dog_namespace/testttt:f1", ] ) -dd_search = con.annotation.get(namespace="Khoroshevskyi") -dd_search_pr = con.annotation.get(namespace="Khoroshevskyi", admin="Khoroshevskyi") +dd_search = con.annotation.get(namespace="dog_namespace") +dd_search_pr = con.annotation.get(namespace="dog_namespace", admin="dog_namespace") dd_search_pr_namespace = con.annotation.get( - query="s", admin=["Khoroshevskyi", "test_11"] + query="s", admin=["dog_namespace", "test_11"] ) -dd_all = con.annotation.get(admin=["Khoroshevskyi", "test_11"]) +dd_all = con.annotation.get(admin=["dog_namespace", "test_11"]) print(dd_list) @@ -112,15 +108,10 @@ def upload_sample_pep_to_db(connection: pepdbagent.PEPDatabaseAgent): ################ # Namespace -ff = con.namespace.get("Khoroshevskyi", admin="Khoroshevskyi") - -print(ff) - - -ff = con.project.get_by_rp("Khoroshevskyi/gse_yaml:default") +ff = con.namespace.get("dog_namespace", admin="dog_namespace") print(ff) -dell = con.project.delete(namespace="Khoroshevskyi", name="dupa", tag="test1") +con.project.update(update_dict={"is_private": False}, namespace="dog_namespace", name="testttt", tag="test1") -con.project.update(update_dict={"is_private": False}, namespace="Khoroshevskyi", name="dupa", tag="test1") +dell = con.project.delete(namespace="dog_namespace", name="testttt", tag="test1") \ No newline at end of file From 6b5576c26bfd732161593ff8fdcc6129b6a02237 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 9 Jun 2023 12:53:16 -0400 Subject: [PATCH 31/32] fixed order by functionality --- pepdbagent/modules/annotation.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 83ba427..6b7cdf2 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -291,8 +291,6 @@ def _get_projects( pep_schema=result.pep_schema, ) ) - if not order_desc: - results_list.reverse() return results_list @staticmethod @@ -324,9 +322,8 @@ def _add_order_by_keyword( if desc and by == "name": order_by_obj = order_by_obj.desc() - else: - if not desc: - order_by_obj = order_by_obj.desc() + elif by != "name" and not desc: + order_by_obj = order_by_obj.desc() return statement.order_by(order_by_obj) From 94921d31797eb62d61f96f4ae9907b0c31439e83 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 9 Jun 2023 13:09:40 -0400 Subject: [PATCH 32/32] changed release date --- docs/changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 61ffcf6..e1e65e5 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,7 +2,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.4.0] -- 2023-06-01 +## [0.4.0] -- 2023-06-09 - Transitioned to SQLAlchemy ORM. - Added a pep_schema column to the database. - Implemented a new testing approach.