From b26f059a672b89d847ce72c4bdbe7c6607048fbb Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Sat, 30 Jul 2022 14:09:59 -0400 Subject: [PATCH 01/40] fixed #15 --- README.md | 9 ++-- pepagent/pepagent.py | 125 +++++++++++++++++++++++++++++++++---------- 2 files changed, 103 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 06ea77f..f0a76af 100644 --- a/README.md +++ b/README.md @@ -60,15 +60,16 @@ print(projects_list) ``` 6) Get annotation about single project or projects: + ```python # Get dictionary of annotation for multiple projects by namespace -projects_anno_list = projectDB.get_anno(namespace='Test') +projects_anno_list = projectDB.get_project_annotation(namespace='Test') # Get dictionary of annotation for 1 project by id -projects_anno_list = projectDB.get_anno(id='5') +projects_anno_list = projectDB.get_project_annotation(id='5') # Get dictionary of annotation for 1 project by digest -projects_anno_list = projectDB.get_anno(digest='1495b8d5b586ab71c9f3a30dd265b3c3') +projects_anno_list = projectDB.get_project_annotation(digest='1495b8d5b586ab71c9f3a30dd265b3c3') # Get dictionary of annotation for 1 project by registry -projects_anno_list = projectDB.get_anno(digest='Test/subtable3') +projects_anno_list = projectDB.get_project_annotation(digest='Test/subtable3') ``` diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 1d8222b..cee574e 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -6,9 +6,10 @@ from hashlib import md5 from itertools import chain import ubiquerg +import sys +import os -from pepagent.utils import all_elements_are_strings, is_valid_resgistry_path - +from .utils import all_elements_are_strings, is_valid_resgistry_path from .const import * from .exceptions import SchemaError @@ -73,9 +74,9 @@ def upload_project( """ Upload project to the database :param peppy.Project project: Project object that has to be uploaded to the DB - :param str namespace: namespace of the project (Default: 'other') - :param str name: name of the project (Default: name is taken from the project object) - :param dict anno: dict with annotations about current project + :param namespace: namespace of the project (Default: 'other') + :param name: name of the project (Default: name is taken from the project object) + :param anno: dict with annotations about current project """ cursor = self.postgresConnection.cursor() try: @@ -110,8 +111,7 @@ def upload_project( ) proj_id = cursor.fetchone()[0] - # _LOGGER.info(f"Uploading {proj_name} project!") - print("dsfasdf") + _LOGGER.info(f"Uploading {proj_name} project!") self._commit_connection() cursor.close() _LOGGER.info( @@ -132,11 +132,11 @@ def get_project( ) -> peppy.Project: """ Retrieving project from database by specifying project name or id - :param str registry: project registry - :param str namespace: project registry [should be used with name] - :param str name: project name in database [should be used with namespace] - :param str id: project id in database - :param str digest: project digest in database + :param registry: project registry + :param namespace: project registry [should be used with name] + :param name: project name in database [should be used with namespace] + :param id: project id in database + :param digest: project digest in database :return: peppy object with found project """ sql_q = f""" @@ -174,7 +174,7 @@ def get_project( _LOGGER.warn( f"No project found for supplied input. Did you supply a valid namespace and project? {sql_q}" ) - return None + return peppy.Project() def get_projects( self, @@ -202,7 +202,8 @@ def get_projects( if all( [ not isinstance(registry_paths, str), - # not isinstance(registry_paths, List[str]) <-- want this, but python doesnt support type checking a subscripted generic + # not isinstance(registry_paths, List[str]) <-- want this, + # but python doesnt support type checking a subscripted generic not isinstance(registry_paths, list), ] ): @@ -254,8 +255,8 @@ def get_namespace(self, namespace: str) -> dict: Fetch a particular namespace from the database. This doesnt retrieve full project objects. For that, one should utilize the `get_projects(namespace=...)` function. - :param str namespace: the namespace to fetch - :return dict: A dictionary representation of the namespace in the database + :param namespace: the namespace to fetch + :return: A dictionary representation of the namespace in the database """ sql_q = f"select {ID_COL}, {NAME_COL}, {DIGEST_COL}, {ANNO_COL} from {DB_TABLE_NAME} where namespace = %s" results = self.run_sql_fetchall(sql_q, namespace) @@ -304,7 +305,7 @@ def get_namespaces( return [self.get_namespace(n) for n in namespaces] - def get_anno( + def get_project_annotation( self, registry: str = None, namespace: str = None, @@ -314,12 +315,12 @@ def get_anno( ) -> dict: """ Retrieving project annotation dict by specifying project namespace/name, id, or digest - Additionally can return all namespace project annotations - :param str registry: project registry - :param str namespace: project registry - will return dict of project annotations - :param str name: project name in database [should be used with namespace] - :param str id: project id in database - :param str digest: project digest in database + Additionally you can return all namespace project annotations by specifying only namespace + :param registry: project registry + :param namespace: project registry - will return dict of project annotations + :param name: project name in database [should be used with namespace] + :param id: project id in database + :param digest: project digest in database :return: dict of annotations """ sql_q = f""" @@ -371,7 +372,7 @@ def get_anno( def _get_namespace_proj_anno(self, namespace: str = None) -> dict: """ Get list of all project annotations in namespace - :param str namespace: namespace + :param namespace: namespace return: dict of dicts with all projects in namespace """ @@ -397,10 +398,45 @@ def _get_namespace_proj_anno(self, namespace: str = None) -> dict: return res_dict + def get_namespace_annotation(self, namespace: str = None) -> dict: + """ + Retrieving namespace annotation dict. + If namespace is None it will retrieve dict with all namespace annotations. + :param namespace: project registry + """ + sql_q = f""" + select {NAMESPACE_COL}, count({NAME_COL}) as n_namespace, SUM(({ANNO_COL} ->> 'n_samples')::int) + as n_samples + from {DB_TABLE_NAME} + group by {NAMESPACE_COL}; + """ + result = self.run_sql_fetchall(sql_q) + anno_dict = {} + + for name_sp_result in result: + anno_dict[name_sp_result[0]] = { + "namespace": name_sp_result[0], + "n_namespace": name_sp_result[1], + "n_samples": name_sp_result[2], + } + + if namespace: + try: + return anno_dict[namespace] + except KeyError: + _LOGGER.warning(f"Namespace '{namespace}' was not found.") + return { + "namespace": namespace, + "n_namespace": 0, + "n_samples": 0, + } + + return anno_dict + def run_sql_fetchone(self, sql_query: str, *argv) -> list: """ Fetching one result by providing sql query and arguments - :param str sql_query: sql string that has to run + :param sql_query: sql string that has to run :param argv: arguments that has to be added to sql query :return: set of query result """ @@ -413,7 +449,7 @@ def run_sql_fetchone(self, sql_query: str, *argv) -> list: if output_result is not None: return list(output_result) else: - return None + return [] except psycopg2.Error as e: _LOGGER.error(f"Error occurred while running query: {e}") finally: @@ -441,7 +477,7 @@ def run_sql_fetchall(self, sql_query: str, *argv) -> list: def _create_digest(project_dict: dict) -> str: """ Create digest for PEP project - :param dict project_dict: project dict + :param project_dict: project dict :return: digest string """ _LOGGER.info(f"Creating digest for: {project_dict['name']}") @@ -468,3 +504,38 @@ def _check_conn_db(self) -> None: cols_name.sort() if DB_COLUMNS != cols_name: raise SchemaError + + +def main(): + # Create connection to db: + # projectDB = PepAgent( + # user="postgres", + # password="docker", + # ) + projectDB = PepAgent("postgresql://postgres:docker@localhost:5432/pep-base-sql") + + # Add new projects to database + # directory = "/home/bnt4me/Virginia/pephub_db/sample_pep/" + # os.walk(directory) + # projects = ( + # [os.path.join(x[0], "project_config.yaml") for x in os.walk(directory)] + # )[1:] + # + # print(projects) + # for d in projects: + # try: + # prp_project2 = peppy.Project(d) + # projectDB.upload_project(prp_project2, namespace="King", anno={"sample_anno": "Tony Stark "}) + # except Exception: + # pass + + # dfd = projectDB.get_project_annotation(namespace="other") + print(projectDB.get_namespace_annotation()) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + print("Pipeline aborted.") + sys.exit(1) From adb4ec8f85315175f5a150b26da1c498a4df5dd3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Sat, 30 Jul 2022 14:13:30 -0400 Subject: [PATCH 02/40] cleaning --- .vscode/settings.json | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9b38853..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true -} \ No newline at end of file From 2bfd5167cc6469a99a0f2faed58e792ebb087c1d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 8 Aug 2022 22:41:13 -0400 Subject: [PATCH 03/40] tag added to db Fixed #23 --- README.md | 63 +++++++++-- pep_db/pep_db.sql | 8 +- pepagent/const.py | 13 ++- pepagent/pepagent.py | 255 +++++++++++++++++++++++++++++++------------ 4 files changed, 255 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index f0a76af..3bf64ab 100644 --- a/README.md +++ b/README.md @@ -23,17 +23,26 @@ projectDB = PepAgent("postgresql://postgres:docker@localhost:5432/pep-base-sql") pep_project = peppy.Project("/sample_pep/subtable3/project_config.yaml") # use upload_project function to add this project to the DB projectDB.upload_project(pep_project, namespace = "Test", anno={"project": "annotation_dict"}) -# additionally you can specify name of the project +# additionally you can specify name and tag of the project ``` -3) Get list of available namespaces: +3) Get list of projects in namespace: +```python +list_of_namespaces = projectDB.get_namespace(namespace="King") +print(list_of_namespaces) + +``` + +4) Get list of available namespaces: ```python list_of_namespaces = projectDB.get_namespaces() print(list_of_namespaces) +# To get list with with just names of namespaces set: names=True +# otherwise you will get list with namespaces with information about all projects ``` -4) Get project +5) Get project ```python # Get project by id: pr_ob = projectDB.get_project(id=3) @@ -47,29 +56,59 @@ print(pr_ob.samples) pr_ob = projectDB.get_project(namespace='Test', name='subtable3') print(pr_ob.samples) +# Get project by registry +pr_ob = projectDB.get_project(registry='Test/subtable3:this_is_tag') +print(pr_ob.samples) + +# Get project by namespace and name +pr_ob = projectDB.get_project(namespace='Test', name='subtable3', tag='this_is_tag') +print(pr_ob.samples) + # Get project by digest pr_ob = projectDB.get_project(digest='1495b8d5b586ab71c9f3a30dd265b3c3') print(pr_ob.samples) ``` -5) Get list of all available projects in the namespace +4) Get list of projects ```python -# Get project by id: -projects_list = projectDB.get_project_list('Test') -print(projects_list) +# Get projects by registry +pr_ob = projectDB.get_projects(registry='Test/subtable3') +print(pr_ob.samples) + +# Get projects by list of registries +pr_ob = projectDB.get_projects(registry=['Test/subtable3', 'King/pr25'] ) +print(pr_ob.samples) + +# Get projects by namespace +pr_ob = projectDB.get_projects(namespace='Test') +print(pr_ob.samples) + +# Get project by tag +pr_ob = projectDB.get_project(tag='this_is_tag') +print(pr_ob.samples) + ``` -6) Get annotation about single project or projects: +5) Get annotation about single project or projects: ```python -# Get dictionary of annotation for multiple projects by namespace -projects_anno_list = projectDB.get_project_annotation(namespace='Test') + # Get dictionary of annotation for 1 project by id projects_anno_list = projectDB.get_project_annotation(id='5') # Get dictionary of annotation for 1 project by digest projects_anno_list = projectDB.get_project_annotation(digest='1495b8d5b586ab71c9f3a30dd265b3c3') # Get dictionary of annotation for 1 project by registry -projects_anno_list = projectDB.get_project_annotation(digest='Test/subtable3') - +projects_anno_list = projectDB.get_project_annotation(digest='Test/subtable3:this_is_tag') +# if tag is not set default tag will be set +projects_anno_list = projectDB.get_project_annotation(namespace='Test/subtable3') ``` +6) Get annotations namespace or all namespaces: + +```python +# Get dictionary of annotation for specific namespace +namespace_anno = projectDB.get_namespace_annotation(namespace='Test') + +# Get dictiionary of annotations for all namespaces +namespace_anno_all = projectDB.get_namespace_annotation() +``` diff --git a/pep_db/pep_db.sql b/pep_db/pep_db.sql index 3a64997..671bc4d 100644 --- a/pep_db/pep_db.sql +++ b/pep_db/pep_db.sql @@ -6,11 +6,13 @@ SET client_encoding = 'LATIN1'; CREATE TABLE projects ( - id BIGSERIAL NOT NULL PRIMARY KEY, + id BIGSERIAL NOT NULL, namespace TEXT NOT NULL, name TEXT NOT NULL, + tag TEXT NOT NULL, digest TEXT NOT NULL, -- shoud be changed to CHARACTER - project_value json NOT NULL, - anno_info json -- annotation information + project_value jsonb NOT NULL, + anno_info jsonb, -- annotation information + CONSTRAINT id PRIMARY KEY (namespace, name, tag) ); diff --git a/pepagent/const.py b/pepagent/const.py index adc9ef3..8f8848c 100644 --- a/pepagent/const.py +++ b/pepagent/const.py @@ -4,6 +4,17 @@ ANNO_COL = "anno_info" NAMESPACE_COL = "namespace" NAME_COL = "name" +TAG_COL = "tag" DIGEST_COL = "digest" -DB_COLUMNS = [ID_COL, PROJ_COL, ANNO_COL, NAMESPACE_COL, NAME_COL, DIGEST_COL] +DB_COLUMNS = [ID_COL, + DIGEST_COL, + PROJ_COL, + ANNO_COL, + NAMESPACE_COL, + NAME_COL, + TAG_COL + ] + +DEFAULT_NAMESPACE = "other" +DEFAULT_TAG = "primary" diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index cee574e..5f10545 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -1,5 +1,6 @@ from typing import List, Union import psycopg2 +from psycopg2.errors import UniqueViolation import json import logmuse import peppy @@ -9,9 +10,9 @@ import sys import os -from .utils import all_elements_are_strings, is_valid_resgistry_path -from .const import * -from .exceptions import SchemaError +from utils import all_elements_are_strings, is_valid_resgistry_path +from const import * +from exceptions import SchemaError # from pprint import pprint @@ -69,19 +70,25 @@ def upload_project( project: peppy.Project, namespace: str = None, name: str = None, + tag: str = None, anno: dict = None, + update: bool = False, ) -> None: """ Upload project to the database :param peppy.Project project: Project object that has to be uploaded to the DB :param namespace: namespace of the project (Default: 'other') :param name: name of the project (Default: name is taken from the project object) + :param tag: tag (or version) of the project :param anno: dict with annotations about current project + :param update: boolean value if project hase to be updated """ cursor = self.postgresConnection.cursor() try: if namespace is None: - namespace = "other" + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG proj_dict = project.to_dict(extended=True) if name: proj_name = name @@ -97,26 +104,52 @@ def upload_project( anno_info = json.dumps(anno_info) proj_dict = json.dumps(proj_dict) - sql = f"""INSERT INTO projects({NAMESPACE_COL}, {NAME_COL}, {DIGEST_COL}, {PROJ_COL}, {ANNO_COL}) - VALUES (%s, %s, %s, %s, %s) RETURNING {ID_COL};""" - cursor.execute( - sql, - ( - namespace, - proj_name, - proj_digest, - proj_dict, - anno_info, - ), - ) + if update and self.check_project_existance(namespace=namespace, name=proj_name, tag=tag): + _LOGGER.info(f"Updating {proj_name} project!") + sql = f"""UPDATE {DB_TABLE_NAME} + SET {DIGEST_COL} = %s, {PROJ_COL}= %s, {ANNO_COL}= %s + WHERE {NAMESPACE_COL} = %s and {NAME_COL} = %s and {TAG_COL} = %s;""" + cursor.execute( + sql, + ( + proj_digest, + proj_dict, + anno_info, + namespace, + proj_name, + tag, + ), + ) + _LOGGER.info("Project has been updated!") - proj_id = cursor.fetchone()[0] - _LOGGER.info(f"Uploading {proj_name} project!") - self._commit_connection() - cursor.close() - _LOGGER.info( - f"Project: {proj_name} was successfully uploaded. The Id of this project is {proj_id}" - ) + else: + try: + _LOGGER.info(f"Uploading {proj_name} project!") + sql = f"""INSERT INTO {DB_TABLE_NAME}({NAMESPACE_COL}, {NAME_COL}, {TAG_COL}, {DIGEST_COL}, {PROJ_COL}, {ANNO_COL}) + VALUES (%s, %s, %s, %s, %s, %s) RETURNING {ID_COL};""" + cursor.execute( + sql, + ( + namespace, + proj_name, + tag, + proj_digest, + proj_dict, + anno_info, + ), + ) + proj_id = cursor.fetchone()[0] + _LOGGER.info( + f"Project: {proj_name} was successfully uploaded. The Id of this project is {proj_id}" + ) + + self._commit_connection() + cursor.close() + + except UniqueViolation: + _LOGGER.warning(f"Namespace, name and tag already exists. Project won't be uploaded. " + f"Solution: Set update value as True (project will be overwritten)," + f" or change tag!") except psycopg2.Error as e: print(f"{e}") @@ -127,6 +160,7 @@ def get_project( registry: str = None, namespace: str = None, name: str = None, + tag: str = None, id: int = None, digest: str = None, ) -> peppy.Project: @@ -146,10 +180,15 @@ def get_project( reg = ubiquerg.parse_registry_path(registry) namespace = reg["namespace"] name = reg["item"] + tag = reg["tag"] - if name is not None and namespace is not None: - sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s;""" - found_prj = self.run_sql_fetchone(sql_q, name, namespace) + if name is not None: + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG + sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s;""" + found_prj = self.run_sql_fetchone(sql_q, name, namespace, tag) elif id is not None: sql_q = f""" {sql_q} where {ID_COL}=%s; """ @@ -166,12 +205,12 @@ def get_project( _LOGGER.info("Files haven't been downloaded, returning empty project") return peppy.Project() - if found_prj is not None: + if found_prj: _LOGGER.info(f"Project has been found: {found_prj[0]}") project_value = found_prj[1] return peppy.Project(project_dict=project_value) else: - _LOGGER.warn( + _LOGGER.warning( f"No project found for supplied input. Did you supply a valid namespace and project? {sql_q}" ) return peppy.Project() @@ -180,30 +219,31 @@ def get_projects( self, registry_paths: Union[str, List[str]] = None, namespace: str = None, + tag: str = None, ) -> List[peppy.Project]: """ Get a list of projects as peppy.Project instances. This function can be used in 3 ways: 1. Get all projects in the database (call empty) 2. Get a list of projects using a list registry paths 3. Get a list of projects in a namespace + 4. Get a list of projects with certain tag (can be used with namespace) - :param Union[str, List[str]] registry_paths: A list of registry paths of the form {namespace}/{project}. - :param str namespace: The namespace to fetch all projects from. - :return List[peppy.Project]: a list of peppy.Project instances for the requested projects. + :param registry_paths: A list of registry paths of the form {namespace}/{name}. + :param namespace: The namespace to fetch all projects from. + :param tag: The tag to fetch all projects from. + :return: a list of peppy.Project instances for the requested projects. """ # Case 1. Fetch all projects in database - if all([registry_paths is None, namespace is None]): + if all([registry_paths is None, namespace is None, tag is None]): sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME}" results = self.run_sql_fetchall(sql_q) # Case 2. fetch list of registry paths - elif registry_paths is not None: + elif registry_paths: # check typing if all( [ not isinstance(registry_paths, str), - # not isinstance(registry_paths, List[str]) <-- want this, - # but python doesnt support type checking a subscripted generic not isinstance(registry_paths, list), ] ): @@ -243,40 +283,60 @@ def get_projects( results = self.run_sql_fetchall(sql_q, *flattened_registries) # Case 3. Get projects by namespace + elif namespace: + if tag: + sql_q = f"select {NAME_COL}, {PROJ_COL} " \ + f"from {DB_TABLE_NAME} " \ + f"where namespace = %s and tag = %s" + results = self.run_sql_fetchall(sql_q, namespace, tag) + else: + sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where namespace = %s" + results = self.run_sql_fetchall(sql_q, namespace) + + # Case 4. Get projects by namespace + elif tag: + sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where tag = %s" + results = self.run_sql_fetchall(sql_q, tag) + print(results) + else: - sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where namespace = %s" - results = self.run_sql_fetchall(sql_q, namespace) + _LOGGER.warning(f"Incorrect input!") + results = [] # extract out the project config dictionary from the query return [peppy.Project(project_dict=p[1]) for p in results] def get_namespace(self, namespace: str) -> dict: """ - Fetch a particular namespace from the database. This doesnt retrieve full project + Fetch a particular namespace from the database. This doesn't retrieve full project objects. For that, one should utilize the `get_projects(namespace=...)` function. :param namespace: the namespace to fetch :return: A dictionary representation of the namespace in the database """ - sql_q = f"select {ID_COL}, {NAME_COL}, {DIGEST_COL}, {ANNO_COL} from {DB_TABLE_NAME} where namespace = %s" - results = self.run_sql_fetchall(sql_q, namespace) - projects = [ - { - "id": p[0], - "name": p[1], - "digest": p[2], - "description": p[3]["proj_description"], - "n_samples": p[3]["n_samples"], + try: + sql_q = f"select {ID_COL}, {NAME_COL}, {TAG_COL}, {DIGEST_COL}, {ANNO_COL} from {DB_TABLE_NAME} where namespace = %s" + results = self.run_sql_fetchall(sql_q, namespace) + projects = [ + { + "id": p[0], + "name": p[1], + "tag": p[2], + "digest": p[3], + "description": p[4]["proj_description"], + "n_samples": p[4]["n_samples"], + } + for p in results + ] + result = { + "namespace": namespace, + "projects": projects, + "n_samples": sum(map(lambda p: p["n_samples"], projects)), + "n_projects": len(projects), } - for p in results - ] - result = { - "namespace": namespace, - "projects": projects, - "n_samples": sum(map(lambda p: p["n_samples"], projects)), - "n_projects": len(projects), - } - return result + return result + except TypeError: + _LOGGER.warning(f"Error occurred while getting data from '{namespace}' namespace") def get_namespaces( self, namespaces: List[str] = None, names_only: bool = False @@ -303,13 +363,21 @@ def get_namespaces( if names_only: return [n[0] for n in namespaces] - return [self.get_namespace(n) for n in namespaces] + namespaces_list = [] + for ns in namespaces: + try: + namespaces_list.append(self.get_namespace(ns)) + except TypeError: + _LOGGER.warning(f"Warning: Error in collecting projects from database. {ns} wasn't collected!") + + return namespaces_list def get_project_annotation( self, registry: str = None, namespace: str = None, name: str = None, + tag: str = None, id: int = None, digest: str = None, ) -> dict: @@ -319,6 +387,7 @@ def get_project_annotation( :param registry: project registry :param namespace: project registry - will return dict of project annotations :param name: project name in database [should be used with namespace] + :param tag: tag of the projects :param id: project id in database :param digest: project digest in database :return: dict of annotations @@ -328,6 +397,7 @@ def get_project_annotation( {ID_COL}, {NAMESPACE_COL}, {NAME_COL}, + {TAG_COL}, {ANNO_COL} from {DB_TABLE_NAME} """ @@ -335,11 +405,16 @@ def get_project_annotation( reg = ubiquerg.parse_registry_path(registry) namespace = reg["namespace"] name = reg["item"] + tag = reg["tag"] - if not name and namespace: + if not name and not tag and namespace: return self._get_namespace_proj_anno(namespace) - if name and namespace: + if name and namespace and tag: + sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s;""" + found_prj = self.run_sql_fetchone(sql_q, name, namespace, tag) + + elif name and namespace: sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s;""" found_prj = self.run_sql_fetchone(sql_q, name, namespace) @@ -347,6 +422,10 @@ def get_project_annotation( sql_q = f""" {sql_q} where {ID_COL}=%s; """ found_prj = self.run_sql_fetchone(sql_q, id) + elif tag: + sql_q = f""" {sql_q} where {TAG_COL}=%s; """ + found_prj = self.run_sql_fetchone(sql_q, tag) + elif digest: sql_q = f""" {sql_q} where {DIGEST_COL}=%s; """ found_prj = self.run_sql_fetchone(sql_q, digest) @@ -393,7 +472,8 @@ def _get_namespace_proj_anno(self, namespace: str = None) -> dict: res_dict[result[2]] = { ID_COL: result[0], NAMESPACE_COL: result[1], - ANNO_COL: result[3], + TAG_COL: result[3], + ANNO_COL: result[4], } return res_dict @@ -405,7 +485,7 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: :param namespace: project registry """ sql_q = f""" - select {NAMESPACE_COL}, count({NAME_COL}) as n_namespace, SUM(({ANNO_COL} ->> 'n_samples')::int) + select {NAMESPACE_COL}, count(DISTINCT {TAG_COL}) as n_tags , count({NAME_COL}) as n_namespace, SUM(({ANNO_COL} ->> 'n_samples')::int) as n_samples from {DB_TABLE_NAME} group by {NAMESPACE_COL}; @@ -416,8 +496,9 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: for name_sp_result in result: anno_dict[name_sp_result[0]] = { "namespace": name_sp_result[0], - "n_namespace": name_sp_result[1], - "n_samples": name_sp_result[2], + "n_tags": name_sp_result[1], + "n_projects": name_sp_result[2], + "n_samples": name_sp_result[3], } if namespace: @@ -427,12 +508,37 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: _LOGGER.warning(f"Namespace '{namespace}' was not found.") return { "namespace": namespace, - "n_namespace": 0, + "n_projects": 0, "n_samples": 0, } return anno_dict + def check_project_existance(self, + registry: str = None, + namespace: str = DEFAULT_NAMESPACE, + name: str = None, + tag: str = DEFAULT_TAG, + ) -> bool: + if registry is not None: + reg = ubiquerg.parse_registry_path(registry, + defaults=[('namespace', DEFAULT_NAMESPACE), + ('item', None), + ('tag', DEFAULT_TAG)]) + namespace = reg["namespace"] + name = reg["item"] + tag = reg["tag"] + sql = f"""SELECT {ID_COL} from {DB_TABLE_NAME} + WHERE {NAMESPACE_COL} = %s AND + {NAME_COL} = %s AND + {TAG_COL} = %s;""" + + if self.run_sql_fetchone(sql, namespace, name, tag): + return True + else: + return False + + def run_sql_fetchone(self, sql_query: str, *argv) -> list: """ Fetching one result by providing sql query and arguments @@ -514,7 +620,10 @@ def main(): # ) projectDB = PepAgent("postgresql://postgres:docker@localhost:5432/pep-base-sql") - # Add new projects to database + #prp_project2 = peppy.Project("/home/bnt4me/Virginia/pephub_db/sample_pep/amendments2/project_config.yaml") + # projectDB.upload_project(prp_project2, namespace="King", anno={"sample_anno": "Tony Stark "}) + + #Add new projects to database # directory = "/home/bnt4me/Virginia/pephub_db/sample_pep/" # os.walk(directory) # projects = ( @@ -525,12 +634,22 @@ def main(): # for d in projects: # try: # prp_project2 = peppy.Project(d) - # projectDB.upload_project(prp_project2, namespace="King", anno={"sample_anno": "Tony Stark "}) + # projectDB.upload_project(prp_project2, namespace="King", tag="new_tag", anno={"sample_anno": "Tony Stark "}) # except Exception: # pass - # dfd = projectDB.get_project_annotation(namespace="other") - print(projectDB.get_namespace_annotation()) + # dfd = projectDB.get_project(registry="King/amendments2") + # print(dfd) + # dfd = projectDB.get_projects(tag="new_tag") + # print(dfd) + # dfd = projectDB.get_namespaces() + # print(dfd) + # dfd = projectDB.get_namespace(namespace="other") + # print(dfd) + + d = projectDB.get_namespace_annotation() + print(d) + #print(projectDB.get_namespace_annotation()) if __name__ == "__main__": From 4e78f7ffa58c3160b7d97560cc5e336891aaced9 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 8 Aug 2022 22:48:55 -0400 Subject: [PATCH 04/40] path fix --- pepagent/pepagent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 5f10545..967c05e 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -10,9 +10,9 @@ import sys import os -from utils import all_elements_are_strings, is_valid_resgistry_path -from const import * -from exceptions import SchemaError +from .utils import all_elements_are_strings, is_valid_resgistry_path +from .const import * +from .exceptions import SchemaError # from pprint import pprint From 753eeec16eff16764d921fda8901b45820d77084 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 9 Aug 2022 10:29:19 -0400 Subject: [PATCH 05/40] lint --- pepagent/const.py | 9 +------ pepagent/pepagent.py | 60 +++++++++++++++++++++++++++----------------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/pepagent/const.py b/pepagent/const.py index 8f8848c..98492ae 100644 --- a/pepagent/const.py +++ b/pepagent/const.py @@ -7,14 +7,7 @@ TAG_COL = "tag" DIGEST_COL = "digest" -DB_COLUMNS = [ID_COL, - DIGEST_COL, - PROJ_COL, - ANNO_COL, - NAMESPACE_COL, - NAME_COL, - TAG_COL - ] +DB_COLUMNS = [ID_COL, DIGEST_COL, PROJ_COL, ANNO_COL, NAMESPACE_COL, NAME_COL, TAG_COL] DEFAULT_NAMESPACE = "other" DEFAULT_TAG = "primary" diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 967c05e..3956031 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -104,7 +104,9 @@ def upload_project( anno_info = json.dumps(anno_info) proj_dict = json.dumps(proj_dict) - if update and self.check_project_existance(namespace=namespace, name=proj_name, tag=tag): + if update and self.check_project_existance( + namespace=namespace, name=proj_name, tag=tag + ): _LOGGER.info(f"Updating {proj_name} project!") sql = f"""UPDATE {DB_TABLE_NAME} SET {DIGEST_COL} = %s, {PROJ_COL}= %s, {ANNO_COL}= %s @@ -147,9 +149,11 @@ def upload_project( cursor.close() except UniqueViolation: - _LOGGER.warning(f"Namespace, name and tag already exists. Project won't be uploaded. " - f"Solution: Set update value as True (project will be overwritten)," - f" or change tag!") + _LOGGER.warning( + f"Namespace, name and tag already exists. Project won't be uploaded. " + f"Solution: Set update value as True (project will be overwritten)," + f" or change tag!" + ) except psycopg2.Error as e: print(f"{e}") @@ -285,9 +289,11 @@ def get_projects( # Case 3. Get projects by namespace elif namespace: if tag: - sql_q = f"select {NAME_COL}, {PROJ_COL} " \ - f"from {DB_TABLE_NAME} " \ - f"where namespace = %s and tag = %s" + sql_q = ( + f"select {NAME_COL}, {PROJ_COL} " + f"from {DB_TABLE_NAME} " + f"where namespace = %s and tag = %s" + ) results = self.run_sql_fetchall(sql_q, namespace, tag) else: sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where namespace = %s" @@ -336,7 +342,9 @@ def get_namespace(self, namespace: str) -> dict: } return result except TypeError: - _LOGGER.warning(f"Error occurred while getting data from '{namespace}' namespace") + _LOGGER.warning( + f"Error occurred while getting data from '{namespace}' namespace" + ) def get_namespaces( self, namespaces: List[str] = None, names_only: bool = False @@ -368,7 +376,9 @@ def get_namespaces( try: namespaces_list.append(self.get_namespace(ns)) except TypeError: - _LOGGER.warning(f"Warning: Error in collecting projects from database. {ns} wasn't collected!") + _LOGGER.warning( + f"Warning: Error in collecting projects from database. {ns} wasn't collected!" + ) return namespaces_list @@ -514,17 +524,22 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: return anno_dict - def check_project_existance(self, - registry: str = None, - namespace: str = DEFAULT_NAMESPACE, - name: str = None, - tag: str = DEFAULT_TAG, - ) -> bool: + def check_project_existance( + self, + registry: str = None, + namespace: str = DEFAULT_NAMESPACE, + name: str = None, + tag: str = DEFAULT_TAG, + ) -> bool: if registry is not None: - reg = ubiquerg.parse_registry_path(registry, - defaults=[('namespace', DEFAULT_NAMESPACE), - ('item', None), - ('tag', DEFAULT_TAG)]) + reg = ubiquerg.parse_registry_path( + registry, + defaults=[ + ("namespace", DEFAULT_NAMESPACE), + ("item", None), + ("tag", DEFAULT_TAG), + ], + ) namespace = reg["namespace"] name = reg["item"] tag = reg["tag"] @@ -538,7 +553,6 @@ def check_project_existance(self, else: return False - def run_sql_fetchone(self, sql_query: str, *argv) -> list: """ Fetching one result by providing sql query and arguments @@ -620,10 +634,10 @@ def main(): # ) projectDB = PepAgent("postgresql://postgres:docker@localhost:5432/pep-base-sql") - #prp_project2 = peppy.Project("/home/bnt4me/Virginia/pephub_db/sample_pep/amendments2/project_config.yaml") + # prp_project2 = peppy.Project("/home/bnt4me/Virginia/pephub_db/sample_pep/amendments2/project_config.yaml") # projectDB.upload_project(prp_project2, namespace="King", anno={"sample_anno": "Tony Stark "}) - #Add new projects to database + # Add new projects to database # directory = "/home/bnt4me/Virginia/pephub_db/sample_pep/" # os.walk(directory) # projects = ( @@ -649,7 +663,7 @@ def main(): d = projectDB.get_namespace_annotation() print(d) - #print(projectDB.get_namespace_annotation()) + # print(projectDB.get_namespace_annotation()) if __name__ == "__main__": From 3291ad8136cd89e76ff1035c70d51fea547bf962 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 12 Aug 2022 17:22:23 -0400 Subject: [PATCH 06/40] added last_update info, and upade_project --- .github/workflows/pytest.yml | 2 +- pepagent/pepagent.py | 232 +++++++++++++++++++----------- requirements/requirements-all.txt | 2 +- setup.py | 3 +- 4 files changed, 150 insertions(+), 89 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index ebea15a..764f1fd 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.6", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] os: [ubuntu-latest] services: diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 3956031..a1f6616 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -1,6 +1,6 @@ from typing import List, Union import psycopg2 -from psycopg2.errors import UniqueViolation +from psycopg2.errors import UniqueViolation, NotNullViolation import json import logmuse import peppy @@ -9,16 +9,17 @@ import ubiquerg import sys import os +import datetime from .utils import all_elements_are_strings, is_valid_resgistry_path from .const import * from .exceptions import SchemaError +import coloredlogs # from pprint import pprint - _LOGGER = logmuse.init_logger("pepDB_connector") - +coloredlogs.install(logger=_LOGGER, datefmt="%H:%M:%S", fmt="[%(levelname)s] [%(asctime)s] %(message)s",) class PepAgent: """ @@ -26,13 +27,13 @@ class PepAgent: """ def __init__( - self, - dsn=None, - host="localhost", - port=5432, - database="pep-base-sql", - user=None, - password=None, + self, + dsn=None, + host="localhost", + port=5432, + database="pep-base-sql", + user=None, + password=None, ): _LOGGER.info(f"Initializing connection to {database}...") @@ -66,13 +67,13 @@ def close_connection(self) -> None: self.postgresConnection.close() def upload_project( - self, - project: peppy.Project, - namespace: str = None, - name: str = None, - tag: str = None, - anno: dict = None, - update: bool = False, + self, + project: peppy.Project, + namespace: str = None, + name: str = None, + tag: str = None, + anno: dict = None, + update: bool = False, ) -> None: """ Upload project to the database @@ -98,81 +99,134 @@ def upload_project( anno_info = { "proj_description": proj_dict["description"], "n_samples": len(project.samples), + "last_update": str(datetime.datetime.now()), } if anno: anno_info.update(anno) anno_info = json.dumps(anno_info) proj_dict = json.dumps(proj_dict) - if update and self.check_project_existance( - namespace=namespace, name=proj_name, tag=tag - ): - _LOGGER.info(f"Updating {proj_name} project!") - sql = f"""UPDATE {DB_TABLE_NAME} - SET {DIGEST_COL} = %s, {PROJ_COL}= %s, {ANNO_COL}= %s - WHERE {NAMESPACE_COL} = %s and {NAME_COL} = %s and {TAG_COL} = %s;""" + try: + _LOGGER.info(f"Uploading {proj_name} project...") + sql = f"""INSERT INTO {DB_TABLE_NAME}({NAMESPACE_COL}, {NAME_COL}, {TAG_COL}, {DIGEST_COL}, {PROJ_COL}, {ANNO_COL}) + VALUES (%s, %s, %s, %s, %s, %s) RETURNING {ID_COL};""" cursor.execute( sql, ( - proj_digest, - proj_dict, - anno_info, namespace, proj_name, tag, + proj_digest, + proj_dict, + anno_info, ), ) - _LOGGER.info("Project has been updated!") - - else: - try: - _LOGGER.info(f"Uploading {proj_name} project!") - sql = f"""INSERT INTO {DB_TABLE_NAME}({NAMESPACE_COL}, {NAME_COL}, {TAG_COL}, {DIGEST_COL}, {PROJ_COL}, {ANNO_COL}) - VALUES (%s, %s, %s, %s, %s, %s) RETURNING {ID_COL};""" - cursor.execute( - sql, - ( - namespace, - proj_name, - tag, - proj_digest, - proj_dict, - anno_info, - ), - ) - proj_id = cursor.fetchone()[0] - _LOGGER.info( - f"Project: {proj_name} was successfully uploaded. The Id of this project is {proj_id}" - ) + proj_id = cursor.fetchone()[0] + _LOGGER.info( + f"Project: '{namespace}/{proj_name}:{tag}' was successfully uploaded." + ) - self._commit_connection() - cursor.close() + self._commit_connection() + cursor.close() - except UniqueViolation: + except UniqueViolation: + if update: + self.update_project(namespace=namespace, name=proj_name, tag=tag, project=project, anno=anno) + else: _LOGGER.warning( f"Namespace, name and tag already exists. Project won't be uploaded. " f"Solution: Set update value as True (project will be overwritten)," f" or change tag!" ) + except NotNullViolation: + _LOGGER.error( + f"Name of the project wasn't provided. Project will not be uploaded" + ) except psycopg2.Error as e: - print(f"{e}") + _LOGGER.error( + f"Error while uploading project. Project hasn't ben uploaded!" + ) cursor.close() + def update_project( + self, + project: peppy.Project, + namespace: str = None, + name: str = None, + tag: str = None, + anno: dict = None, + ) -> None: + """ + Upload project to the database + :param peppy.Project project: Project object that has to be uploaded to the DB + :param namespace: namespace of the project (Default: 'other') + :param name: name of the project (Default: name is taken from the project object) + :param tag: tag (or version) of the project + :param anno: dict with annotations about current project + :param update: boolean value if project hase to be updated + """ + cursor = self.postgresConnection.cursor() + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG + proj_dict = project.to_dict(extended=True) + if name: + proj_name = name + else: + proj_name = proj_dict["name"] + + proj_digest = self._create_digest(proj_dict) + anno_info = { + "proj_description": proj_dict["description"], + "n_samples": len(project.samples), + } + if anno: + anno_info.update(anno) + anno_info = json.dumps(anno_info) + proj_dict = json.dumps(proj_dict) + + if self.check_project_existance( + namespace=namespace, name=proj_name, tag=tag + ): + try: + _LOGGER.info(f"Updating {proj_name} project...") + sql = f"""UPDATE {DB_TABLE_NAME} + SET {DIGEST_COL} = %s, {PROJ_COL}= %s, {ANNO_COL}= %s + WHERE {NAMESPACE_COL} = %s and {NAME_COL} = %s and {TAG_COL} = %s;""" + cursor.execute( + sql, + ( + proj_digest, + proj_dict, + anno_info, + namespace, + proj_name, + tag, + ), + ) + _LOGGER.info(f"Project '{namespace}/{proj_name}:{tag}' has been updated!") + except psycopg2.Error: + _LOGGER.error("Error occurred while updating the project!") + else: + _LOGGER.error("Project does not exist! No project will be updated!") + def get_project( - self, - registry: str = None, - namespace: str = None, - name: str = None, - tag: str = None, - id: int = None, - digest: str = None, + self, + registry: str = None, + namespace: str = None, + name: str = None, + tag: str = None, + id: int = None, + digest: str = None, ) -> peppy.Project: """ Retrieving project from database by specifying project name or id :param registry: project registry :param namespace: project registry [should be used with name] :param name: project name in database [should be used with namespace] + :param tag: tag of the project :param id: project id in database :param digest: project digest in database :return: peppy object with found project @@ -220,10 +274,10 @@ def get_project( return peppy.Project() def get_projects( - self, - registry_paths: Union[str, List[str]] = None, - namespace: str = None, - tag: str = None, + self, + registry_paths: Union[str, List[str]] = None, + namespace: str = None, + tag: str = None, ) -> List[peppy.Project]: """ Get a list of projects as peppy.Project instances. This function can be used in 3 ways: @@ -246,10 +300,10 @@ def get_projects( elif registry_paths: # check typing if all( - [ - not isinstance(registry_paths, str), - not isinstance(registry_paths, list), - ] + [ + not isinstance(registry_paths, str), + not isinstance(registry_paths, list), + ] ): raise ValueError( f"Registry paths must be of the type str or List[str]. Supplied: {type(registry_paths)}" @@ -347,7 +401,7 @@ def get_namespace(self, namespace: str) -> dict: ) def get_namespaces( - self, namespaces: List[str] = None, names_only: bool = False + self, namespaces: List[str] = None, names_only: bool = False ) -> list: """ Get list of all available namespaces @@ -383,13 +437,13 @@ def get_namespaces( return namespaces_list def get_project_annotation( - self, - registry: str = None, - namespace: str = None, - name: str = None, - tag: str = None, - id: int = None, - digest: str = None, + self, + registry: str = None, + namespace: str = None, + name: str = None, + tag: str = None, + id: int = None, + digest: str = None, ) -> dict: """ Retrieving project annotation dict by specifying project namespace/name, id, or digest @@ -453,7 +507,8 @@ def get_project_annotation( ID_COL: found_prj[0], NAMESPACE_COL: found_prj[1], NAME_COL: found_prj[2], - ANNO_COL: found_prj[3], + TAG_COL: found_prj[3], + ANNO_COL: found_prj[4], } return anno_dict @@ -525,11 +580,11 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: return anno_dict def check_project_existance( - self, - registry: str = None, - namespace: str = DEFAULT_NAMESPACE, - name: str = None, - tag: str = DEFAULT_TAG, + self, + registry: str = None, + namespace: str = DEFAULT_NAMESPACE, + name: str = None, + tag: str = DEFAULT_TAG, ) -> bool: if registry is not None: reg = ubiquerg.parse_registry_path( @@ -553,6 +608,9 @@ def check_project_existance( else: return False + def check_project_status(self, registry: str = None, namespace: str = None, name: str = None, tag: str = None): + print() + def run_sql_fetchone(self, sql_query: str, *argv) -> list: """ Fetching one result by providing sql query and arguments @@ -625,6 +683,10 @@ def _check_conn_db(self) -> None: if DB_COLUMNS != cols_name: raise SchemaError + def _registry(self, registry: str = None, namespace: str = None, name: str = None, tag: str = None): + pass + + def main(): # Create connection to db: @@ -635,7 +697,7 @@ def main(): projectDB = PepAgent("postgresql://postgres:docker@localhost:5432/pep-base-sql") # prp_project2 = peppy.Project("/home/bnt4me/Virginia/pephub_db/sample_pep/amendments2/project_config.yaml") - # projectDB.upload_project(prp_project2, namespace="King", anno={"sample_anno": "Tony Stark "}) + # projectDB.upload_project(prp_project2, namespace="Date", anno={"sample_anno": "Tony Stark "}) # Add new projects to database # directory = "/home/bnt4me/Virginia/pephub_db/sample_pep/" @@ -648,7 +710,7 @@ def main(): # for d in projects: # try: # prp_project2 = peppy.Project(d) - # projectDB.upload_project(prp_project2, namespace="King", tag="new_tag", anno={"sample_anno": "Tony Stark "}) + # projectDB.upload_project(prp_project2, namespace="other1", anno={"sample_anno": "Tony Stark ", "status": 1}) # except Exception: # pass @@ -661,7 +723,7 @@ def main(): # dfd = projectDB.get_namespace(namespace="other") # print(dfd) - d = projectDB.get_namespace_annotation() + d = projectDB.get_project_annotation(registry="Date/amendments2:primary") print(d) # print(projectDB.get_namespace_annotation()) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 2e18258..71fdfa5 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,4 +1,4 @@ -psycopg2 +psycopg2-binary logmuse peppy @ git+https://github.com/pepkit/peppy.git@dev#egg=peppy ubiquerg>=0.6.2 diff --git a/setup.py b/setup.py index 3459b02..3819048 100644 --- a/setup.py +++ b/setup.py @@ -46,10 +46,9 @@ classifiers=[ "Development Status :: 1 - Planning", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Topic :: Scientific/Engineering :: Bio-Informatics", ], keywords="project, metadata, bioinformatics, database", From eb2ed86f5ec9776dfa9aa7966c8a63d307c96521 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Aug 2022 12:32:44 -0400 Subject: [PATCH 07/40] Added project status --- README.md | 5 +- pepagent/const.py | 7 ++- pepagent/pepagent.py | 133 +++++++++++++++++++++++++++++++++---------- 3 files changed, 111 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 3bf64ab..3b609c5 100644 --- a/README.md +++ b/README.md @@ -43,13 +43,14 @@ print(list_of_namespaces) ``` 5) Get project + ```python # Get project by id: pr_ob = projectDB.get_project(id=3) print(pr_ob.samples) # Get project by registry -pr_ob = projectDB.get_project(registry='Test/subtable3') +pr_ob = projectDB.get_project(registry_path='Test/subtable3') print(pr_ob.samples) # Get project by namespace and name @@ -57,7 +58,7 @@ pr_ob = projectDB.get_project(namespace='Test', name='subtable3') print(pr_ob.samples) # Get project by registry -pr_ob = projectDB.get_project(registry='Test/subtable3:this_is_tag') +pr_ob = projectDB.get_project(registry_path='Test/subtable3:this_is_tag') print(pr_ob.samples) # Get project by namespace and name diff --git a/pepagent/const.py b/pepagent/const.py index 98492ae..fcff2e6 100644 --- a/pepagent/const.py +++ b/pepagent/const.py @@ -9,5 +9,8 @@ DB_COLUMNS = [ID_COL, DIGEST_COL, PROJ_COL, ANNO_COL, NAMESPACE_COL, NAME_COL, TAG_COL] -DEFAULT_NAMESPACE = "other" -DEFAULT_TAG = "primary" +DEFAULT_NAMESPACE = "_" +DEFAULT_TAG = "default" + +STATUS_KEY = 'status' +DEFAULT_STATUS = "1" diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index a1f6616..e1da759 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -11,9 +11,9 @@ import os import datetime -from .utils import all_elements_are_strings, is_valid_resgistry_path -from .const import * -from .exceptions import SchemaError +from utils import all_elements_are_strings, is_valid_resgistry_path +from const import * +from exceptions import SchemaError import coloredlogs # from pprint import pprint @@ -82,7 +82,7 @@ def upload_project( :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project :param anno: dict with annotations about current project - :param update: boolean value if project hase to be updated + :param update: boolean value if existed project has to be updated automatically """ cursor = self.postgresConnection.cursor() try: @@ -95,11 +95,21 @@ def upload_project( proj_name = name else: proj_name = proj_dict["name"] + proj_digest = self._create_digest(proj_dict) + + # adding project status to db: + if STATUS_KEY in anno: + proj_status = anno[STATUS_KEY] + del anno[STATUS_KEY] + else: + proj_status = DEFAULT_STATUS + anno_info = { "proj_description": proj_dict["description"], "n_samples": len(project.samples), "last_update": str(datetime.datetime.now()), + "status": proj_status, } if anno: anno_info.update(anno) @@ -178,18 +188,27 @@ def update_project( proj_name = proj_dict["name"] proj_digest = self._create_digest(proj_dict) + + # adding project status to db: + if STATUS_KEY in anno: + proj_status = anno[STATUS_KEY] + del anno[STATUS_KEY] + else: + proj_status = DEFAULT_STATUS + anno_info = { "proj_description": proj_dict["description"], "n_samples": len(project.samples), + "last_update": str(datetime.datetime.now()), + "status": proj_status, } + if anno: anno_info.update(anno) anno_info = json.dumps(anno_info) proj_dict = json.dumps(proj_dict) - if self.check_project_existance( - namespace=namespace, name=proj_name, tag=tag - ): + if self.check_project_existance(namespace=namespace, name=proj_name, tag=tag): try: _LOGGER.info(f"Updating {proj_name} project...") sql = f"""UPDATE {DB_TABLE_NAME} @@ -214,7 +233,7 @@ def update_project( def get_project( self, - registry: str = None, + registry_path: str = None, namespace: str = None, name: str = None, tag: str = None, @@ -223,8 +242,8 @@ def get_project( ) -> peppy.Project: """ Retrieving project from database by specifying project name or id - :param registry: project registry - :param namespace: project registry [should be used with name] + :param registry_path: project registry_path + :param namespace: project registry_path [should be used with name] :param name: project name in database [should be used with namespace] :param tag: tag of the project :param id: project id in database @@ -234,8 +253,8 @@ def get_project( sql_q = f""" select {ID_COL}, {PROJ_COL} from {DB_TABLE_NAME} """ - if registry is not None: - reg = ubiquerg.parse_registry_path(registry) + if registry_path is not None: + reg = ubiquerg.parse_registry_path(registry_path) namespace = reg["namespace"] name = reg["item"] tag = reg["tag"] @@ -423,7 +442,7 @@ def get_namespaces( sql_q = f"""SELECT DISTINCT {NAMESPACE_COL} FROM {DB_TABLE_NAME};""" namespaces = [n[0] for n in self.run_sql_fetchall(sql_q)] if names_only: - return [n[0] for n in namespaces] + return [n for n in namespaces] namespaces_list = [] for ns in namespaces: @@ -438,7 +457,7 @@ def get_namespaces( def get_project_annotation( self, - registry: str = None, + registry_path: str = None, namespace: str = None, name: str = None, tag: str = None, @@ -448,8 +467,8 @@ def get_project_annotation( """ Retrieving project annotation dict by specifying project namespace/name, id, or digest Additionally you can return all namespace project annotations by specifying only namespace - :param registry: project registry - :param namespace: project registry - will return dict of project annotations + :param registry_path: project registry_path + :param namespace: project registry_path - will return dict of project annotations :param name: project name in database [should be used with namespace] :param tag: tag of the projects :param id: project id in database @@ -465,8 +484,8 @@ def get_project_annotation( {ANNO_COL} from {DB_TABLE_NAME} """ - if registry: - reg = ubiquerg.parse_registry_path(registry) + if registry_path: + reg = ubiquerg.parse_registry_path(registry_path) namespace = reg["namespace"] name = reg["item"] tag = reg["tag"] @@ -547,10 +566,10 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: """ Retrieving namespace annotation dict. If namespace is None it will retrieve dict with all namespace annotations. - :param namespace: project registry + :param namespace: project namespace """ sql_q = f""" - select {NAMESPACE_COL}, count(DISTINCT {TAG_COL}) as n_tags , count({NAME_COL}) as n_namespace, SUM(({ANNO_COL} ->> 'n_samples')::int) + select {NAMESPACE_COL}, count(DISTINCT {TAG_COL}) as n_tags , count({NAME_COL}) as n_namespace, SUM(({ANNO_COL} ->> 'n_samples')::int) as n_samples from {DB_TABLE_NAME} group by {NAMESPACE_COL}; @@ -581,14 +600,23 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: def check_project_existance( self, - registry: str = None, + *, + registry_path: str = None, namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG, ) -> bool: - if registry is not None: + """ + Checking if project exists in the database + :param registry_path: project registry path + :param namespace: project namespace + :param name: project name + :param tag: project tag + :return: Returning True if project exist + """ + if registry_path is not None: reg = ubiquerg.parse_registry_path( - registry, + registry_path, defaults=[ ("namespace", DEFAULT_NAMESPACE), ("item", None), @@ -608,8 +636,56 @@ def check_project_existance( else: return False - def check_project_status(self, registry: str = None, namespace: str = None, name: str = None, tag: str = None): - print() + def check_project_status( + self, + *, + registry_path: str = None, + namespace: str = None, + name: str = None, + tag: str = None, + ) -> str: + """ + Retrieve project status by providing registry path or name, namespace and tag + :param registry_path: project registry + :param namespace: project registry - will return dict of project annotations + :param name: project name in database. [required if registry_path does not specify] + :param tag: tag of the projects + :return: status + """ + sql_q = f""" + select ({ANNO_COL}->>'status') as status + from {DB_TABLE_NAME} + WHERE {NAMESPACE_COL}=%s AND + {NAME_COL}=%s AND {TAG_COL}=%s; + """ + if registry_path: + reg = ubiquerg.parse_registry_path(registry_path) + namespace = reg["namespace"] + name = reg["item"] + tag = reg["tag"] + + if not namespace: + namespace = DEFAULT_NAMESPACE + + if not tag: + tag = DEFAULT_TAG + + if not name: + _LOGGER.error( + "You haven't provided neither registry_path or name! Execution is unsuccessful. " + "Files haven't been downloaded, returning empty dict" + ) + return "None" + + if not self.check_project_existance(namespace=namespace, name=name, tag=tag): + _LOGGER.error( + "Project does not exist, returning None" + ) + return "None" + + result = self.run_sql_fetchone(sql_q, namespace, name, tag) + + return result[0] def run_sql_fetchone(self, sql_query: str, *argv) -> list: """ @@ -683,9 +759,6 @@ def _check_conn_db(self) -> None: if DB_COLUMNS != cols_name: raise SchemaError - def _registry(self, registry: str = None, namespace: str = None, name: str = None, tag: str = None): - pass - def main(): @@ -723,8 +796,8 @@ def main(): # dfd = projectDB.get_namespace(namespace="other") # print(dfd) - d = projectDB.get_project_annotation(registry="Date/amendments2:primary") - print(d) + d = projectDB.check_project_status(registry_path="other1/subtable4:primary") + # print(projectDB.get_namespace_annotation()) From aba3bc9b2a3b4c2664553e6ba7cac00123389f2d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Aug 2022 13:14:10 -0400 Subject: [PATCH 08/40] refactoring, cleaning --- pepagent/const.py | 2 +- pepagent/pepagent.py | 239 ++++++++++++++++++++++--------------------- 2 files changed, 122 insertions(+), 119 deletions(-) diff --git a/pepagent/const.py b/pepagent/const.py index fcff2e6..e9b2180 100644 --- a/pepagent/const.py +++ b/pepagent/const.py @@ -12,5 +12,5 @@ DEFAULT_NAMESPACE = "_" DEFAULT_TAG = "default" -STATUS_KEY = 'status' +STATUS_KEY = "status" DEFAULT_STATUS = "1" diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index e1da759..f6f04f7 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -11,15 +11,20 @@ import os import datetime -from utils import all_elements_are_strings, is_valid_resgistry_path -from const import * -from exceptions import SchemaError +from .utils import all_elements_are_strings, is_valid_resgistry_path +from .const import * +from .exceptions import SchemaError import coloredlogs # from pprint import pprint _LOGGER = logmuse.init_logger("pepDB_connector") -coloredlogs.install(logger=_LOGGER, datefmt="%H:%M:%S", fmt="[%(levelname)s] [%(asctime)s] %(message)s",) +coloredlogs.install( + logger=_LOGGER, + datefmt="%H:%M:%S", + fmt="[%(levelname)s] [%(asctime)s] %(message)s", +) + class PepAgent: """ @@ -27,13 +32,13 @@ class PepAgent: """ def __init__( - self, - dsn=None, - host="localhost", - port=5432, - database="pep-base-sql", - user=None, - password=None, + self, + dsn=None, + host="localhost", + port=5432, + database="pep-base-sql", + user=None, + password=None, ): _LOGGER.info(f"Initializing connection to {database}...") @@ -67,13 +72,13 @@ def close_connection(self) -> None: self.postgresConnection.close() def upload_project( - self, - project: peppy.Project, - namespace: str = None, - name: str = None, - tag: str = None, - anno: dict = None, - update: bool = False, + self, + project: peppy.Project, + namespace: str = None, + name: str = None, + tag: str = None, + anno: dict = None, + update: bool = False, ) -> None: """ Upload project to the database @@ -141,7 +146,13 @@ def upload_project( except UniqueViolation: if update: - self.update_project(namespace=namespace, name=proj_name, tag=tag, project=project, anno=anno) + self.update_project( + namespace=namespace, + name=proj_name, + tag=tag, + project=project, + anno=anno, + ) else: _LOGGER.warning( f"Namespace, name and tag already exists. Project won't be uploaded. " @@ -160,12 +171,12 @@ def upload_project( cursor.close() def update_project( - self, - project: peppy.Project, - namespace: str = None, - name: str = None, - tag: str = None, - anno: dict = None, + self, + project: peppy.Project, + namespace: str = None, + name: str = None, + tag: str = None, + anno: dict = None, ) -> None: """ Upload project to the database @@ -225,28 +236,29 @@ def update_project( tag, ), ) - _LOGGER.info(f"Project '{namespace}/{proj_name}:{tag}' has been updated!") + _LOGGER.info( + f"Project '{namespace}/{proj_name}:{tag}' has been updated!" + ) except psycopg2.Error: _LOGGER.error("Error occurred while updating the project!") else: _LOGGER.error("Project does not exist! No project will be updated!") def get_project( - self, - registry_path: str = None, - namespace: str = None, - name: str = None, - tag: str = None, - id: int = None, - digest: str = None, + self, + *, + registry_path: str = None, + namespace: str = None, + name: str = None, + tag: str = None, + digest: str = None, ) -> peppy.Project: """ - Retrieving project from database by specifying project name or id + Retrieving project from database by specifying project registry_path, name, or digest :param registry_path: project registry_path - :param namespace: project registry_path [should be used with name] - :param name: project name in database [should be used with namespace] + :param namespace: project registry_path + :param name: project name in database :param tag: tag of the project - :param id: project id in database :param digest: project digest in database :return: peppy object with found project """ @@ -267,19 +279,15 @@ def get_project( sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s;""" found_prj = self.run_sql_fetchone(sql_q, name, namespace, tag) - elif id is not None: - sql_q = f""" {sql_q} where {ID_COL}=%s; """ - found_prj = self.run_sql_fetchone(sql_q, id) - elif digest is not None: sql_q = f""" {sql_q} where {DIGEST_COL}=%s; """ found_prj = self.run_sql_fetchone(sql_q, digest) else: _LOGGER.error( - "You haven't provided neither namespace/name, digest nor id! Execution is unsuccessful" + "You haven't provided neither registry_path, name nor digest! Execution is unsuccessful" + "Files haven't been downloaded, returning empty project" ) - _LOGGER.info("Files haven't been downloaded, returning empty project") return peppy.Project() if found_prj: @@ -293,10 +301,11 @@ def get_project( return peppy.Project() def get_projects( - self, - registry_paths: Union[str, List[str]] = None, - namespace: str = None, - tag: str = None, + self, + *, + registry_paths: Union[str, List[str]] = None, + namespace: str = None, + tag: str = None, ) -> List[peppy.Project]: """ Get a list of projects as peppy.Project instances. This function can be used in 3 ways: @@ -319,10 +328,10 @@ def get_projects( elif registry_paths: # check typing if all( - [ - not isinstance(registry_paths, str), - not isinstance(registry_paths, list), - ] + [ + not isinstance(registry_paths, str), + not isinstance(registry_paths, list), + ] ): raise ValueError( f"Registry paths must be of the type str or List[str]. Supplied: {type(registry_paths)}" @@ -420,10 +429,10 @@ def get_namespace(self, namespace: str) -> dict: ) def get_namespaces( - self, namespaces: List[str] = None, names_only: bool = False + self, namespaces: List[str] = None, names_only: bool = False ) -> list: """ - Get list of all available namespaces + Get list of all available namespaces. :param List[str] namespaces: An optional list of namespaces to fetch. :param bool names_only: Flag to indicate you only want unique namespace names @@ -456,22 +465,20 @@ def get_namespaces( return namespaces_list def get_project_annotation( - self, - registry_path: str = None, - namespace: str = None, - name: str = None, - tag: str = None, - id: int = None, - digest: str = None, + self, + registry_path: str = None, + namespace: str = None, + name: str = None, + tag: str = None, + digest: str = None, ) -> dict: """ - Retrieving project annotation dict by specifying project namespace/name, id, or digest + Retrieving project annotation dict by specifying project name, or digest Additionally you can return all namespace project annotations by specifying only namespace :param registry_path: project registry_path :param namespace: project registry_path - will return dict of project annotations - :param name: project name in database [should be used with namespace] + :param name: project name in database :param tag: tag of the projects - :param id: project id in database :param digest: project digest in database :return: dict of annotations """ @@ -501,10 +508,6 @@ def get_project_annotation( sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s;""" found_prj = self.run_sql_fetchone(sql_q, name, namespace) - elif id: - sql_q = f""" {sql_q} where {ID_COL}=%s; """ - found_prj = self.run_sql_fetchone(sql_q, id) - elif tag: sql_q = f""" {sql_q} where {TAG_COL}=%s; """ found_prj = self.run_sql_fetchone(sql_q, tag) @@ -532,47 +535,19 @@ def get_project_annotation( return anno_dict - def _get_namespace_proj_anno(self, namespace: str = None) -> dict: - """ - Get list of all project annotations in namespace - :param namespace: namespace - return: dict of dicts with all projects in namespace - """ - - if not namespace: - _LOGGER.info(f"No namespace provided... returning empty list") - return {} - - sql_q = f"""select - {ID_COL}, - {NAMESPACE_COL}, - {NAME_COL}, - {ANNO_COL} - from {DB_TABLE_NAME} where namespace='{namespace}';""" - - results = self.run_sql_fetchall(sql_q) - res_dict = {} - for result in results: - res_dict[result[2]] = { - ID_COL: result[0], - NAMESPACE_COL: result[1], - TAG_COL: result[3], - ANNO_COL: result[4], - } - - return res_dict - def get_namespace_annotation(self, namespace: str = None) -> dict: """ - Retrieving namespace annotation dict. + Retrieving namespace annotation dict with number of tags, projects and samples. If namespace is None it will retrieve dict with all namespace annotations. :param namespace: project namespace """ sql_q = f""" - select {NAMESPACE_COL}, count(DISTINCT {TAG_COL}) as n_tags , count({NAME_COL}) as n_namespace, SUM(({ANNO_COL} ->> 'n_samples')::int) - as n_samples - from {DB_TABLE_NAME} - group by {NAMESPACE_COL}; + select {NAMESPACE_COL}, count(DISTINCT {TAG_COL}) as n_tags , + count({NAME_COL}) as + n_namespace, SUM(({ANNO_COL} ->> 'n_samples')::int) + as n_samples + from {DB_TABLE_NAME} + group by {NAMESPACE_COL}; """ result = self.run_sql_fetchall(sql_q) anno_dict = {} @@ -592,19 +567,50 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: _LOGGER.warning(f"Namespace '{namespace}' was not found.") return { "namespace": namespace, + "n_tags": 0, "n_projects": 0, "n_samples": 0, } return anno_dict + def _get_namespace_proj_anno(self, namespace: str = None) -> dict: + """ + Get list of all project annotations in namespace + :param namespace: namespace + return: dict of dicts with all projects in namespace + """ + + if not namespace: + _LOGGER.info(f"No namespace provided... returning empty list") + return {} + + sql_q = f"""select + {ID_COL}, + {NAMESPACE_COL}, + {NAME_COL}, + {ANNO_COL} + from {DB_TABLE_NAME} where namespace='{namespace}';""" + + results = self.run_sql_fetchall(sql_q) + res_dict = {} + for result in results: + res_dict[result[2]] = { + ID_COL: result[0], + NAMESPACE_COL: result[1], + TAG_COL: result[3], + ANNO_COL: result[4], + } + + return res_dict + def check_project_existance( - self, - *, - registry_path: str = None, - namespace: str = DEFAULT_NAMESPACE, - name: str = None, - tag: str = DEFAULT_TAG, + self, + *, + registry_path: str = None, + namespace: str = DEFAULT_NAMESPACE, + name: str = None, + tag: str = DEFAULT_TAG, ) -> bool: """ Checking if project exists in the database @@ -637,12 +643,12 @@ def check_project_existance( return False def check_project_status( - self, - *, - registry_path: str = None, - namespace: str = None, - name: str = None, - tag: str = None, + self, + *, + registry_path: str = None, + namespace: str = None, + name: str = None, + tag: str = None, ) -> str: """ Retrieve project status by providing registry path or name, namespace and tag @@ -678,9 +684,7 @@ def check_project_status( return "None" if not self.check_project_existance(namespace=namespace, name=name, tag=tag): - _LOGGER.error( - "Project does not exist, returning None" - ) + _LOGGER.error("Project does not exist, returning None") return "None" result = self.run_sql_fetchone(sql_q, namespace, name, tag) @@ -760,7 +764,6 @@ def _check_conn_db(self) -> None: raise SchemaError - def main(): # Create connection to db: # projectDB = PepAgent( From 947e3d8228c3664ee600ce88e012991369039d1f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Aug 2022 15:24:20 -0400 Subject: [PATCH 09/40] reformatting get_project --- README.md | 6 ++--- pepagent/pepagent.py | 57 +++++++++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 3b609c5..b9c9996 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pepagent + pep_db -Database and pep_db_agent for storing and processing pep projects +Database and PEPagent for storing and processing pep projects --- ## How to create pep_db: @@ -12,9 +12,9 @@ Complete instruction can be found here: [pep_db](pep_db) 1) Create connection with DB: ```python # 1) By providing credentials and connection information: -projectDB = PepAgent(user="postgres", password="docker",) +projectDB = PEPagent(user="postgres", password="docker",) # 2) or By providing connection string: -projectDB = PepAgent("postgresql://postgres:docker@localhost:5432/pep-base-sql") +projectDB = PEPagent("postgresql://postgres:docker@localhost:5432/pep-base-sql") ``` 2) Add new project to the DB diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index f6f04f7..7af5d8f 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -26,7 +26,7 @@ ) -class PepAgent: +class PEPagent: """ A class to connect to pep-db and upload, download, read and process pep projects. """ @@ -244,32 +244,61 @@ def update_project( else: _LOGGER.error("Project does not exist! No project will be updated!") + def get_project_by_registry(self, registry_path: str = None): + """ + Retrieving project from database by specifying project registry_path + :param registry_path: project registry_path [e.g. namespace/name:tag] + :return: peppy object with found project + """ + if not registry_path: + _LOGGER.error( + "No registry path provided! Returning empty project!" + ) + return peppy.Project() + else: + reg = ubiquerg.parse_registry_path(registry_path) + namespace = reg["namespace"] + name = reg["item"] + tag = reg["tag"] + return self.get_project(namespace=namespace, name=name, tag=tag) + + def get_project_by_digest(self, digest: str = None): + """ + Retrieving project from database by specifying project registry_path + :param registry_path: project registry_path [e.g. namespace/name:tag] + :return: peppy object with found project + """ + sql_q = f""" + select {ID_COL}, {PROJ_COL} from {DB_TABLE_NAME} + """ + if not digest: + _LOGGER.error( + "No digest was provided! Returning empty project!" + ) + return peppy.Project() + else: + sql_q = f""" {sql_q} where {DIGEST_COL}=%s; """ + found_prj = self.run_sql_fetchone(sql_q, digest) + project_value = found_prj[1] + return peppy.Project(project_dict=project_value) + def get_project( self, *, - registry_path: str = None, namespace: str = None, name: str = None, tag: str = None, - digest: str = None, ) -> peppy.Project: """ Retrieving project from database by specifying project registry_path, name, or digest - :param registry_path: project registry_path :param namespace: project registry_path :param name: project name in database :param tag: tag of the project - :param digest: project digest in database :return: peppy object with found project """ sql_q = f""" select {ID_COL}, {PROJ_COL} from {DB_TABLE_NAME} """ - if registry_path is not None: - reg = ubiquerg.parse_registry_path(registry_path) - namespace = reg["namespace"] - name = reg["item"] - tag = reg["tag"] if name is not None: if namespace is None: @@ -279,13 +308,9 @@ def get_project( sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s;""" found_prj = self.run_sql_fetchone(sql_q, name, namespace, tag) - elif digest is not None: - sql_q = f""" {sql_q} where {DIGEST_COL}=%s; """ - found_prj = self.run_sql_fetchone(sql_q, digest) - else: _LOGGER.error( - "You haven't provided neither registry_path, name nor digest! Execution is unsuccessful" + "You haven't provided name! Execution is unsuccessful" "Files haven't been downloaded, returning empty project" ) return peppy.Project() @@ -770,7 +795,7 @@ def main(): # user="postgres", # password="docker", # ) - projectDB = PepAgent("postgresql://postgres:docker@localhost:5432/pep-base-sql") + projectDB = PEPagent("postgresql://postgres:docker@localhost:5432/pep-base-sql") # prp_project2 = peppy.Project("/home/bnt4me/Virginia/pephub_db/sample_pep/amendments2/project_config.yaml") # projectDB.upload_project(prp_project2, namespace="Date", anno={"sample_anno": "Tony Stark "}) From 53d970e1eadf21783160e73988b8f2d1d09a3b70 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 17 Aug 2022 14:23:28 -0400 Subject: [PATCH 10/40] Decoupled functions --- pepagent/pepagent.py | 312 ++++++++++++++++++++++------------------- tests/test_pepagent.py | 12 +- 2 files changed, 169 insertions(+), 155 deletions(-) diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 7af5d8f..acd866c 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -74,9 +74,9 @@ def close_connection(self) -> None: def upload_project( self, project: peppy.Project, - namespace: str = None, + namespace: str = DEFAULT_NAMESPACE, name: str = None, - tag: str = None, + tag: str = DEFAULT_TAG, anno: dict = None, update: bool = False, ) -> None: @@ -91,10 +91,7 @@ def upload_project( """ cursor = self.postgresConnection.cursor() try: - if namespace is None: - namespace = DEFAULT_NAMESPACE - if tag is None: - tag = DEFAULT_TAG + proj_dict = project.to_dict(extended=True) if name: proj_name = name @@ -173,9 +170,9 @@ def upload_project( def update_project( self, project: peppy.Project, - namespace: str = None, + namespace: str = DEFAULT_NAMESPACE, name: str = None, - tag: str = None, + tag: str = DEFAULT_TAG, anno: dict = None, ) -> None: """ @@ -188,10 +185,7 @@ def update_project( :param update: boolean value if project hase to be updated """ cursor = self.postgresConnection.cursor() - if namespace is None: - namespace = DEFAULT_NAMESPACE - if tag is None: - tag = DEFAULT_TAG + proj_dict = project.to_dict(extended=True) if name: proj_name = name @@ -262,32 +256,12 @@ def get_project_by_registry(self, registry_path: str = None): tag = reg["tag"] return self.get_project(namespace=namespace, name=name, tag=tag) - def get_project_by_digest(self, digest: str = None): - """ - Retrieving project from database by specifying project registry_path - :param registry_path: project registry_path [e.g. namespace/name:tag] - :return: peppy object with found project - """ - sql_q = f""" - select {ID_COL}, {PROJ_COL} from {DB_TABLE_NAME} - """ - if not digest: - _LOGGER.error( - "No digest was provided! Returning empty project!" - ) - return peppy.Project() - else: - sql_q = f""" {sql_q} where {DIGEST_COL}=%s; """ - found_prj = self.run_sql_fetchone(sql_q, digest) - project_value = found_prj[1] - return peppy.Project(project_dict=project_value) - def get_project( self, *, - namespace: str = None, + namespace: str = DEFAULT_NAMESPACE, name: str = None, - tag: str = None, + tag: str = DEFAULT_TAG ) -> peppy.Project: """ Retrieving project from database by specifying project registry_path, name, or digest @@ -301,10 +275,6 @@ def get_project( """ if name is not None: - if namespace is None: - namespace = DEFAULT_NAMESPACE - if tag is None: - tag = DEFAULT_TAG sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s;""" found_prj = self.run_sql_fetchone(sql_q, name, namespace, tag) @@ -327,74 +297,18 @@ def get_project( def get_projects( self, - *, - registry_paths: Union[str, List[str]] = None, namespace: str = None, tag: str = None, ) -> List[peppy.Project]: """ - Get a list of projects as peppy.Project instances. This function can be used in 3 ways: - 1. Get all projects in the database (call empty) - 2. Get a list of projects using a list registry paths - 3. Get a list of projects in a namespace - 4. Get a list of projects with certain tag (can be used with namespace) - - :param registry_paths: A list of registry paths of the form {namespace}/{name}. + Get a list of projects as peppy.Project instances. + Get a list of projects in a namespace :param namespace: The namespace to fetch all projects from. :param tag: The tag to fetch all projects from. :return: a list of peppy.Project instances for the requested projects. """ - # Case 1. Fetch all projects in database - if all([registry_paths is None, namespace is None, tag is None]): - sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME}" - results = self.run_sql_fetchall(sql_q) - - # Case 2. fetch list of registry paths - elif registry_paths: - # check typing - if all( - [ - not isinstance(registry_paths, str), - not isinstance(registry_paths, list), - ] - ): - raise ValueError( - f"Registry paths must be of the type str or List[str]. Supplied: {type(registry_paths)}" - ) - else: - # coerce to list if necessary - if isinstance(registry_paths, str): - registry_paths = [registry_paths] - - # check for valid registry paths - for rpath in registry_paths: - if not is_valid_resgistry_path(rpath): - # should we raise an error or just warn with the logger? - raise ValueError(f"Invalid registry path supplied: '{rpath}'") - - # dynamically build filter for set of registry paths - parametrized_filter = "" - for i in range(len(registry_paths)): - parametrized_filter += "(namespace=%s and name=%s)" - if i < len(registry_paths) - 1: - parametrized_filter += " or " - - sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where {parametrized_filter}" - flattened_registries = tuple( - chain( - *[ - [r["namespace"], r["item"]] - for r in map( - lambda rpath: ubiquerg.parse_registry_path(rpath), - registry_paths, - ) - ] - ) - ) - results = self.run_sql_fetchall(sql_q, *flattened_registries) - # Case 3. Get projects by namespace - elif namespace: + if namespace: if tag: sql_q = ( f"select {NAME_COL}, {PROJ_COL} " @@ -419,7 +333,67 @@ def get_projects( # extract out the project config dictionary from the query return [peppy.Project(project_dict=p[1]) for p in results] - def get_namespace(self, namespace: str) -> dict: + def get_projects_by_list( + self, + registry_paths: list, + ) -> List[peppy.Project]: + """ + Get a list of projects as peppy.Project instances. + Get a list of projects in a list of registry_paths + : + :return: a list of peppy.Project instances for the requested projects. + """ + if not isinstance(registry_paths, list): + raise TypeError(f"incorrect variable type provided") + for rpath in registry_paths: + if not is_valid_resgistry_path(rpath): + # should we raise an error or just warn with the logger? + raise ValueError(f"Invalid registry path supplied: '{rpath}'") + + # dynamically build filter for set of registry paths + parametrized_filter = "" + for i in range(len(registry_paths)): + parametrized_filter += "(namespace=%s and name=%s)" + if i < len(registry_paths) - 1: + parametrized_filter += " or " + + sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where {parametrized_filter}" + flattened_registries = tuple( + chain( + *[ + [r["namespace"], r["item"]] + for r in map( + lambda rpath: ubiquerg.parse_registry_path(rpath), + registry_paths, + ) + ] + ) + ) + results = self.run_sql_fetchall(sql_q, *flattened_registries) + + # extract out the project config dictionary from the query + return [peppy.Project(project_dict=p[1]) for p in results] + + def get_projects_all( + self, + ) -> List[peppy.Project]: + """ + Get a list of projects as peppy.Project instances. + Get all projects in the database (call empty) + :return: a list of peppy.Project instances for the requested projects. + """ + sql_q = f"select {PROJ_COL} from {DB_TABLE_NAME}" + result = self.run_sql_fetchall(sql_q) + proj_list = [] + + for raw_proj in result: + try: + proj_list.append(peppy.Project(project_dict=raw_proj[0])) + except Exception as err: + _LOGGER.error(f"Exception in {err}") + return proj_list + + def get_namespace_info(self, namespace: str) -> dict: """ Fetch a particular namespace from the database. This doesn't retrieve full project objects. For that, one should utilize the `get_projects(namespace=...)` function. @@ -453,7 +427,7 @@ def get_namespace(self, namespace: str) -> dict: f"Error occurred while getting data from '{namespace}' namespace" ) - def get_namespaces( + def get_namespaces_info( self, namespaces: List[str] = None, names_only: bool = False ) -> list: """ @@ -481,7 +455,7 @@ def get_namespaces( namespaces_list = [] for ns in namespaces: try: - namespaces_list.append(self.get_namespace(ns)) + namespaces_list.append(self.get_namespace_info(ns)) except TypeError: _LOGGER.warning( f"Warning: Error in collecting projects from database. {ns} wasn't collected!" @@ -491,20 +465,16 @@ def get_namespaces( def get_project_annotation( self, - registry_path: str = None, namespace: str = None, name: str = None, tag: str = None, - digest: str = None, ) -> dict: """ - Retrieving project annotation dict by specifying project name, or digest + Retrieving project annotation dict by specifying project name Additionally you can return all namespace project annotations by specifying only namespace - :param registry_path: project registry_path :param namespace: project registry_path - will return dict of project annotations :param name: project name in database :param tag: tag of the projects - :param digest: project digest in database :return: dict of annotations """ sql_q = f""" @@ -516,11 +486,6 @@ def get_project_annotation( {ANNO_COL} from {DB_TABLE_NAME} """ - if registry_path: - reg = ubiquerg.parse_registry_path(registry_path) - namespace = reg["namespace"] - name = reg["item"] - tag = reg["tag"] if not name and not tag and namespace: return self._get_namespace_proj_anno(namespace) @@ -537,10 +502,6 @@ def get_project_annotation( sql_q = f""" {sql_q} where {TAG_COL}=%s; """ found_prj = self.run_sql_fetchone(sql_q, tag) - elif digest: - sql_q = f""" {sql_q} where {DIGEST_COL}=%s; """ - found_prj = self.run_sql_fetchone(sql_q, digest) - else: _LOGGER.error( "You haven't provided neither namespace/name, digest nor id! Execution is unsuccessful" @@ -560,6 +521,24 @@ def get_project_annotation( return anno_dict + def get_project_annotation_by_registry( + self, + registry_path: str, + ) -> dict: + """ + Retrieving project annotation dict by specifying registry path + :param registry_path: project registry_path + + :return: dict of annotations + """ + + reg = ubiquerg.parse_registry_path(registry_path) + namespace = reg["namespace"] + name = reg["item"] + tag = reg["tag"] + + return self.get_project_annotation(namespace=namespace, name=name, tag=tag) + def get_namespace_annotation(self, namespace: str = None) -> dict: """ Retrieving namespace annotation dict with number of tags, projects and samples. @@ -632,31 +611,21 @@ def _get_namespace_proj_anno(self, namespace: str = None) -> dict: def check_project_existance( self, *, - registry_path: str = None, namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG, ) -> bool: """ Checking if project exists in the database - :param registry_path: project registry path :param namespace: project namespace :param name: project name :param tag: project tag :return: Returning True if project exist """ - if registry_path is not None: - reg = ubiquerg.parse_registry_path( - registry_path, - defaults=[ - ("namespace", DEFAULT_NAMESPACE), - ("item", None), - ("tag", DEFAULT_TAG), - ], - ) - namespace = reg["namespace"] - name = reg["item"] - tag = reg["tag"] + if name is None: + _LOGGER.error(f"Name is not specified") + return False + sql = f"""SELECT {ID_COL} from {DB_TABLE_NAME} WHERE {NAMESPACE_COL} = %s AND {NAME_COL} = %s AND @@ -667,17 +636,42 @@ def check_project_existance( else: return False + def check_project_existance_by_registry( + self, + registry_path: str, + ) -> bool: + """ + Checking if project exists in the database + :param registry_path: project registry path + :return: Returning True if project exist + """ + + reg = ubiquerg.parse_registry_path( + registry_path, + defaults=[ + ("namespace", DEFAULT_NAMESPACE), + ("item", None), + ("tag", DEFAULT_TAG), + ], + ) + namespace = reg["namespace"] + name = reg["item"] + tag = reg["tag"] + + if self.check_project_existance(namespace=namespace, name=name, tag=tag): + return True + else: + return False + def check_project_status( self, *, - registry_path: str = None, - namespace: str = None, + namespace: str = DEFAULT_NAMESPACE, name: str = None, - tag: str = None, + tag: str = DEFAULT_TAG, ) -> str: """ - Retrieve project status by providing registry path or name, namespace and tag - :param registry_path: project registry + Retrieve project status by providing name, namespace and tag :param namespace: project registry - will return dict of project annotations :param name: project name in database. [required if registry_path does not specify] :param tag: tag of the projects @@ -689,17 +683,6 @@ def check_project_status( WHERE {NAMESPACE_COL}=%s AND {NAME_COL}=%s AND {TAG_COL}=%s; """ - if registry_path: - reg = ubiquerg.parse_registry_path(registry_path) - namespace = reg["namespace"] - name = reg["item"] - tag = reg["tag"] - - if not namespace: - namespace = DEFAULT_NAMESPACE - - if not tag: - tag = DEFAULT_TAG if not name: _LOGGER.error( @@ -716,6 +699,39 @@ def check_project_status( return result[0] + + def check_project_status_by_registry( + self, + registry_path: str = None, + ) -> str: + """ + Retrieve project status by providing registry path + :param registry_path: project registry + + :return: status + """ + reg = ubiquerg.parse_registry_path(registry_path) + namespace = reg["namespace"] + name = reg["item"] + tag = reg["tag"] + + return self.check_project_status(namespace=namespace, name=name, tag=tag) + + def get_registry_paths_by_digest(self, digest: str): + """ + Get project registry by digest + :param digest: Digest of the project + """ + sql_q = f"select {NAMESPACE_COL}, {NAME_COL}, {TAG_COL} from {DB_TABLE_NAME} where {DIGEST_COL} = %s" + results = self.run_sql_fetchall(sql_q, digest) + + registry_list = [] + + for res in results: + registry_list.append(f"{res[0]}/{res[1]}:{res[2]}") + + return registry_list + def run_sql_fetchone(self, sql_query: str, *argv) -> list: """ Fetching one result by providing sql query and arguments @@ -797,7 +813,10 @@ def main(): # ) projectDB = PEPagent("postgresql://postgres:docker@localhost:5432/pep-base-sql") - # prp_project2 = peppy.Project("/home/bnt4me/Virginia/pephub_db/sample_pep/amendments2/project_config.yaml") + # dd = projectDB.get_registry_paths_by_digest("c39e0d451741b11d3bfdcaa2b1a3c161") + # print(dd) + dd = projectDB.get_projects_all() + print() # projectDB.upload_project(prp_project2, namespace="Date", anno={"sample_anno": "Tony Stark "}) # Add new projects to database @@ -824,11 +843,10 @@ def main(): # dfd = projectDB.get_namespace(namespace="other") # print(dfd) - d = projectDB.check_project_status(registry_path="other1/subtable4:primary") + # d = projectDB.check_project_status(registry_path="other1/subtable4:primary") # print(projectDB.get_namespace_annotation()) - if __name__ == "__main__": try: sys.exit(main()) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 0ba797d..e5cc516 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -8,7 +8,7 @@ EXAMPLE_NAMESPACES = ["nfcore", "geo", "demo"] -EXAMPLE_REGISTRIES = ["geo/GSE102804", "demo/basic", "nfcore/demo_rna_pep"] +EXAMPLE_REGISTRIES = ["geo/GSE102804:default", "demo/basic:default", "nfcore/demo_rna_pep:default"] class TestDatafetching: @@ -27,19 +27,15 @@ def test_get_project_by_registry(self, registry): assert isinstance(project, peppy.Project) def test_get_projects_by_list(self): - projects = self.db.get_projects(EXAMPLE_REGISTRIES) + projects = self.db.get_projects_by_list(EXAMPLE_REGISTRIES) assert len(projects) == 3 - def test_get_projects_by_registry_path(self): - projects = self.db.get_projects(EXAMPLE_REGISTRIES[0]) - assert len(projects) == 1 - def test_get_projects_by_namespace(self): projects = self.db.get_projects(namespace=EXAMPLE_NAMESPACES[0]) assert len(projects) == 2 def test_get_namespaces(self): - namespaces = self.db.get_namespaces() + namespaces = self.db.get_namespaces_info() assert len(namespaces) > 0 def test_get_namespace_list(self): @@ -48,7 +44,7 @@ def test_get_namespace_list(self): @pytest.mark.parametrize("namespace", EXAMPLE_NAMESPACES) def test_get_namespace(self, namespace: str): - result = self.db.get_namespace(namespace) + result = self.db.get_namespace_info(namespace) assert isinstance(result, dict) assert "projects" in result assert len(result["projects"]) > 0 From ba913d9cdd341fb5704cccb7cb46e8c2146b2f3f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 17 Aug 2022 15:02:15 -0400 Subject: [PATCH 11/40] readme updated + tests --- README.md | 80 +++++++++++++++++++++++++++++------------- pepagent/pepagent.py | 72 +++++++------------------------------ tests/test_pepagent.py | 12 +++---- 3 files changed, 74 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index b9c9996..f7e5445 100644 --- a/README.md +++ b/README.md @@ -25,18 +25,23 @@ pep_project = peppy.Project("/sample_pep/subtable3/project_config.yaml") projectDB.upload_project(pep_project, namespace = "Test", anno={"project": "annotation_dict"}) # additionally you can specify name and tag of the project +# update project + +projectDB.update_project(pep_project, namespace = "Test", anno={"enot": "annotation_dict"}) +# additionally you can specify name and tag of the project + ``` 3) Get list of projects in namespace: ```python -list_of_namespaces = projectDB.get_namespace(namespace="King") +list_of_namespaces = projectDB.get_namespace_info(namespace="King") print(list_of_namespaces) ``` 4) Get list of available namespaces: ```python -list_of_namespaces = projectDB.get_namespaces() +list_of_namespaces = projectDB.get_namespaces_info() print(list_of_namespaces) # To get list with with just names of namespaces set: names=True # otherwise you will get list with namespaces with information about all projects @@ -45,47 +50,45 @@ print(list_of_namespaces) 5) Get project ```python -# Get project by id: -pr_ob = projectDB.get_project(id=3) + +# Get project by registry +pr_ob = projectDB.get_project_by_registry(registry_path='Test/subtable3') print(pr_ob.samples) # Get project by registry -pr_ob = projectDB.get_project(registry_path='Test/subtable3') +pr_ob = projectDB.get_project_by_registry(registry_path='Test/subtable3:this_is_tag') print(pr_ob.samples) # Get project by namespace and name pr_ob = projectDB.get_project(namespace='Test', name='subtable3') print(pr_ob.samples) -# Get project by registry -pr_ob = projectDB.get_project(registry_path='Test/subtable3:this_is_tag') -print(pr_ob.samples) - # Get project by namespace and name pr_ob = projectDB.get_project(namespace='Test', name='subtable3', tag='this_is_tag') print(pr_ob.samples) -# Get project by digest -pr_ob = projectDB.get_project(digest='1495b8d5b586ab71c9f3a30dd265b3c3') -print(pr_ob.samples) ``` 4) Get list of projects ```python -# Get projects by registry -pr_ob = projectDB.get_projects(registry='Test/subtable3') +# Get projects by tag +pr_ob = projectDB.get_projects(tag='new_tag') print(pr_ob.samples) -# Get projects by list of registries -pr_ob = projectDB.get_projects(registry=['Test/subtable3', 'King/pr25'] ) +# Get projects by namespace +pr_ob = projectDB.get_projects(namespace='King') print(pr_ob.samples) -# Get projects by namespace -pr_ob = projectDB.get_projects(namespace='Test') +# Get projects by namespace and tag +pr_ob = projectDB.get_projects(namespace='King', tag='taggg') +print(pr_ob.samples) + +# Get projects by list of registry paths +pr_ob = projectDB.get_projects_by_list(registry_paths=['Test/subtable3:default', 'Test/subtable3:bbb']) print(pr_ob.samples) -# Get project by tag -pr_ob = projectDB.get_project(tag='this_is_tag') +# Get all the projects +pr_ob = projectDB.get_project_all() print(pr_ob.samples) ``` @@ -94,10 +97,6 @@ print(pr_ob.samples) ```python -# Get dictionary of annotation for 1 project by id -projects_anno_list = projectDB.get_project_annotation(id='5') -# Get dictionary of annotation for 1 project by digest -projects_anno_list = projectDB.get_project_annotation(digest='1495b8d5b586ab71c9f3a30dd265b3c3') # Get dictionary of annotation for 1 project by registry projects_anno_list = projectDB.get_project_annotation(digest='Test/subtable3:this_is_tag') # if tag is not set default tag will be set @@ -113,3 +112,36 @@ namespace_anno = projectDB.get_namespace_annotation(namespace='Test') # Get dictiionary of annotations for all namespaces namespace_anno_all = projectDB.get_namespace_annotation() ``` + + +7) Check project existance: + +```python +# by name and namespace: +projectDB.project_exists(namespace="nn", name="buu") + +# by name and namespace and tag: +projectDB.project_exists(namespace="nn", name="buu", tag='dog') + +# by registry path: +projectDB.project_exists_by_registry(registry_path='nn/buu/dog') + +``` + + +8) Check project status: + +```python +# by name and namespace and tag: +# Get dictionary of annotation for specific namespace +projectDB.project_status(namespace="nn", name="buu", tag='dog') + +# by registry path: +projectDB.project_status_by_registry(registry_path='nn/buu/dog') +``` + +9) Get registry paths of all the projects by digest: + +```python +projectDB.get_registry_paths_by_digest(digest='sdafsgwerg243rt2gregw3qr24') +``` \ No newline at end of file diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index acd866c..18e466c 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import List, Union import psycopg2 from psycopg2.errors import UniqueViolation, NotNullViolation @@ -213,7 +215,7 @@ def update_project( anno_info = json.dumps(anno_info) proj_dict = json.dumps(proj_dict) - if self.check_project_existance(namespace=namespace, name=proj_name, tag=tag): + if self.project_exists(namespace=namespace, name=proj_name, tag=tag): try: _LOGGER.info(f"Updating {proj_name} project...") sql = f"""UPDATE {DB_TABLE_NAME} @@ -262,7 +264,7 @@ def get_project( namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG - ) -> peppy.Project: + ) -> Union[peppy.Project | None]: """ Retrieving project from database by specifying project registry_path, name, or digest :param namespace: project registry_path @@ -293,7 +295,7 @@ def get_project( _LOGGER.warning( f"No project found for supplied input. Did you supply a valid namespace and project? {sql_q}" ) - return peppy.Project() + return None def get_projects( self, @@ -608,7 +610,7 @@ def _get_namespace_proj_anno(self, namespace: str = None) -> dict: return res_dict - def check_project_existance( + def project_exists( self, *, namespace: str = DEFAULT_NAMESPACE, @@ -636,7 +638,7 @@ def check_project_existance( else: return False - def check_project_existance_by_registry( + def project_exists_by_registry( self, registry_path: str, ) -> bool: @@ -658,12 +660,12 @@ def check_project_existance_by_registry( name = reg["item"] tag = reg["tag"] - if self.check_project_existance(namespace=namespace, name=name, tag=tag): + if self.project_exists(namespace=namespace, name=name, tag=tag): return True else: return False - def check_project_status( + def project_status( self, *, namespace: str = DEFAULT_NAMESPACE, @@ -691,7 +693,7 @@ def check_project_status( ) return "None" - if not self.check_project_existance(namespace=namespace, name=name, tag=tag): + if not self.project_exists(namespace=namespace, name=name, tag=tag): _LOGGER.error("Project does not exist, returning None") return "None" @@ -700,7 +702,7 @@ def check_project_status( return result[0] - def check_project_status_by_registry( + def project_status_by_registry( self, registry_path: str = None, ) -> str: @@ -715,7 +717,7 @@ def check_project_status_by_registry( name = reg["item"] tag = reg["tag"] - return self.check_project_status(namespace=namespace, name=name, tag=tag) + return self.project_status(namespace=namespace, name=name, tag=tag) def get_registry_paths_by_digest(self, digest: str): """ @@ -803,53 +805,3 @@ def _check_conn_db(self) -> None: cols_name.sort() if DB_COLUMNS != cols_name: raise SchemaError - - -def main(): - # Create connection to db: - # projectDB = PepAgent( - # user="postgres", - # password="docker", - # ) - projectDB = PEPagent("postgresql://postgres:docker@localhost:5432/pep-base-sql") - - # dd = projectDB.get_registry_paths_by_digest("c39e0d451741b11d3bfdcaa2b1a3c161") - # print(dd) - dd = projectDB.get_projects_all() - print() - # projectDB.upload_project(prp_project2, namespace="Date", anno={"sample_anno": "Tony Stark "}) - - # Add new projects to database - # directory = "/home/bnt4me/Virginia/pephub_db/sample_pep/" - # os.walk(directory) - # projects = ( - # [os.path.join(x[0], "project_config.yaml") for x in os.walk(directory)] - # )[1:] - # - # print(projects) - # for d in projects: - # try: - # prp_project2 = peppy.Project(d) - # projectDB.upload_project(prp_project2, namespace="other1", anno={"sample_anno": "Tony Stark ", "status": 1}) - # except Exception: - # pass - - # dfd = projectDB.get_project(registry="King/amendments2") - # print(dfd) - # dfd = projectDB.get_projects(tag="new_tag") - # print(dfd) - # dfd = projectDB.get_namespaces() - # print(dfd) - # dfd = projectDB.get_namespace(namespace="other") - # print(dfd) - - # d = projectDB.check_project_status(registry_path="other1/subtable4:primary") - - # print(projectDB.get_namespace_annotation()) - -if __name__ == "__main__": - try: - sys.exit(main()) - except KeyboardInterrupt: - print("Pipeline aborted.") - sys.exit(1) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index e5cc516..b1eff76 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -1,7 +1,7 @@ import os import pytest import peppy -from pepagent import PepAgent +from pepagent import PEPagent from dotenv import load_dotenv load_dotenv() @@ -13,17 +13,17 @@ class TestDatafetching: - db = PepAgent( + db = PEPagent( user=os.environ.get("POSTGRES_USER") or "postgres", password=os.environ.get("POSTGRES_PASSWORD") or "docker", ) def test_connection(self): - assert isinstance(self.db, PepAgent) + assert isinstance(self.db, PEPagent) @pytest.mark.parametrize("registry", EXAMPLE_REGISTRIES) def test_get_project_by_registry(self, registry): - project = self.db.get_project(registry) + project = self.db.get_project_by_registry(registry) assert isinstance(project, peppy.Project) def test_get_projects_by_list(self): @@ -39,7 +39,7 @@ def test_get_namespaces(self): assert len(namespaces) > 0 def test_get_namespace_list(self): - namespaces = self.db.get_namespaces(names_only=True) + namespaces = self.db.get_namespaces_info(names_only=True) assert all([isinstance(n, str) for n in namespaces]) @pytest.mark.parametrize("namespace", EXAMPLE_NAMESPACES) @@ -52,5 +52,5 @@ def test_get_namespace(self, namespace: str): def test_nonexistent_project(self): this_registry_doesnt_exist = "blueberry/pancakes" with pytest.warns(): - proj = self.db.get_project(this_registry_doesnt_exist) + proj = self.db.get_project_by_registry(this_registry_doesnt_exist) assert proj is None From fa6029a9195ae85dc66dc4d3c93fb269d52ca3ae Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 17 Aug 2022 15:12:10 -0400 Subject: [PATCH 12/40] lint --- pepagent/pepagent.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 18e466c..4cfdc87 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -247,9 +247,7 @@ def get_project_by_registry(self, registry_path: str = None): :return: peppy object with found project """ if not registry_path: - _LOGGER.error( - "No registry path provided! Returning empty project!" - ) + _LOGGER.error("No registry path provided! Returning empty project!") return peppy.Project() else: reg = ubiquerg.parse_registry_path(registry_path) @@ -263,7 +261,7 @@ def get_project( *, namespace: str = DEFAULT_NAMESPACE, name: str = None, - tag: str = DEFAULT_TAG + tag: str = DEFAULT_TAG, ) -> Union[peppy.Project | None]: """ Retrieving project from database by specifying project registry_path, name, or digest @@ -701,7 +699,6 @@ def project_status( return result[0] - def project_status_by_registry( self, registry_path: str = None, From 813e28ec5a7ce6ba77567f874a50fcc182977d3e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Aug 2022 12:55:47 -0400 Subject: [PATCH 13/40] registry_path improvement --- docs/changelog.md | 2 +- pepagent/pepagent.py | 29 ++++++++++++++++++++--------- requirements/requirements-all.txt | 2 +- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index b442004..ff6b20d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,6 +3,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.1.0] -- 2022-07-19 +## [0.1.0] -- 2022-08-18 - 🎉 first release! \ No newline at end of file diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 4cfdc87..3cdc0ef 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from typing import List, Union import psycopg2 from psycopg2.errors import UniqueViolation, NotNullViolation @@ -254,15 +252,20 @@ def get_project_by_registry(self, registry_path: str = None): namespace = reg["namespace"] name = reg["item"] tag = reg["tag"] + + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG + return self.get_project(namespace=namespace, name=name, tag=tag) def get_project( self, - *, namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG, - ) -> Union[peppy.Project | None]: + ) -> Union[peppy.Project, None]: """ Retrieving project from database by specifying project registry_path, name, or digest :param namespace: project registry_path @@ -288,7 +291,7 @@ def get_project( if found_prj: _LOGGER.info(f"Project has been found: {found_prj[0]}") project_value = found_prj[1] - return peppy.Project(project_dict=project_value) + return peppy.Project().from_dict(project_value) else: _LOGGER.warning( f"No project found for supplied input. Did you supply a valid namespace and project? {sql_q}" @@ -331,7 +334,7 @@ def get_projects( results = [] # extract out the project config dictionary from the query - return [peppy.Project(project_dict=p[1]) for p in results] + return [peppy.Project().from_dict(p[1]) for p in results] def get_projects_by_list( self, @@ -388,7 +391,7 @@ def get_projects_all( for raw_proj in result: try: - proj_list.append(peppy.Project(project_dict=raw_proj[0])) + proj_list.append(peppy.Project().from_dict(raw_proj[0])) except Exception as err: _LOGGER.error(f"Exception in {err}") return proj_list @@ -537,6 +540,11 @@ def get_project_annotation_by_registry( name = reg["item"] tag = reg["tag"] + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG + return self.get_project_annotation(namespace=namespace, name=name, tag=tag) def get_namespace_annotation(self, namespace: str = None) -> dict: @@ -610,7 +618,6 @@ def _get_namespace_proj_anno(self, namespace: str = None) -> dict: def project_exists( self, - *, namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG, @@ -665,7 +672,6 @@ def project_exists_by_registry( def project_status( self, - *, namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG, @@ -714,6 +720,11 @@ def project_status_by_registry( name = reg["item"] tag = reg["tag"] + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG + return self.project_status(namespace=namespace, name=name, tag=tag) def get_registry_paths_by_digest(self, digest: str): diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 71fdfa5..0a073c9 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,4 +1,4 @@ psycopg2-binary logmuse -peppy @ git+https://github.com/pepkit/peppy.git@dev#egg=peppy +peppy>=0.34.0 ubiquerg>=0.6.2 From 710d7edf56db1201693e6ff662491b198c1596a4 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Aug 2022 13:11:18 -0400 Subject: [PATCH 14/40] PR comments fix --- README.md | 12 +++++++----- pepagent/pepagent.py | 10 +++++----- tests/test_pepagent.py | 8 ++++---- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index f7e5445..772b88b 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,9 @@ print(list_of_namespaces) ``` 4) Get list of available namespaces: + ```python -list_of_namespaces = projectDB.get_namespaces_info() +list_of_namespaces = projectDB.get_namespaces_info_by_list() print(list_of_namespaces) # To get list with with just names of namespaces set: names=True # otherwise you will get list with namespaces with information about all projects @@ -70,21 +71,22 @@ print(pr_ob.samples) ``` 4) Get list of projects + ```python # Get projects by tag -pr_ob = projectDB.get_projects(tag='new_tag') +pr_ob = projectDB.get_projects_in_namespace(tag='new_tag') print(pr_ob.samples) # Get projects by namespace -pr_ob = projectDB.get_projects(namespace='King') +pr_ob = projectDB.get_projects_in_namespace(namespace='King') print(pr_ob.samples) # Get projects by namespace and tag -pr_ob = projectDB.get_projects(namespace='King', tag='taggg') +pr_ob = projectDB.get_projects_in_namespace(namespace='King', tag='taggg') print(pr_ob.samples) # Get projects by list of registry paths -pr_ob = projectDB.get_projects_by_list(registry_paths=['Test/subtable3:default', 'Test/subtable3:bbb']) +pr_ob = projectDB.get_projects_in_list(registry_paths=['Test/subtable3:default', 'Test/subtable3:bbb']) print(pr_ob.samples) # Get all the projects diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 3cdc0ef..4bf6453 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -163,7 +163,7 @@ def upload_project( except psycopg2.Error as e: _LOGGER.error( - f"Error while uploading project. Project hasn't ben uploaded!" + f"Error while uploading project. Project hasn't been uploaded!" ) cursor.close() @@ -298,7 +298,7 @@ def get_project( ) return None - def get_projects( + def get_projects_in_namespace( self, namespace: str = None, tag: str = None, @@ -336,7 +336,7 @@ def get_projects( # extract out the project config dictionary from the query return [peppy.Project().from_dict(p[1]) for p in results] - def get_projects_by_list( + def get_projects_in_list( self, registry_paths: list, ) -> List[peppy.Project]: @@ -377,7 +377,7 @@ def get_projects_by_list( # extract out the project config dictionary from the query return [peppy.Project(project_dict=p[1]) for p in results] - def get_projects_all( + def get_all_projects( self, ) -> List[peppy.Project]: """ @@ -430,7 +430,7 @@ def get_namespace_info(self, namespace: str) -> dict: f"Error occurred while getting data from '{namespace}' namespace" ) - def get_namespaces_info( + def get_namespaces_info_by_list( self, namespaces: List[str] = None, names_only: bool = False ) -> list: """ diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index b1eff76..0200f1d 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -27,19 +27,19 @@ def test_get_project_by_registry(self, registry): assert isinstance(project, peppy.Project) def test_get_projects_by_list(self): - projects = self.db.get_projects_by_list(EXAMPLE_REGISTRIES) + projects = self.db.get_projects_in_list(EXAMPLE_REGISTRIES) assert len(projects) == 3 def test_get_projects_by_namespace(self): - projects = self.db.get_projects(namespace=EXAMPLE_NAMESPACES[0]) + projects = self.db.get_projects_in_namespace(namespace=EXAMPLE_NAMESPACES[0]) assert len(projects) == 2 def test_get_namespaces(self): - namespaces = self.db.get_namespaces_info() + namespaces = self.db.get_namespaces_info_by_list() assert len(namespaces) > 0 def test_get_namespace_list(self): - namespaces = self.db.get_namespaces_info(names_only=True) + namespaces = self.db.get_namespaces_info_by_list(names_only=True) assert all([isinstance(n, str) for n in namespaces]) @pytest.mark.parametrize("namespace", EXAMPLE_NAMESPACES) From 4e0964562d2e5cb7193daba83f5ba54bf6d5034f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Aug 2022 13:14:01 -0400 Subject: [PATCH 15/40] lint --- tests/test_pepagent.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 0200f1d..36a6f6a 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -8,7 +8,11 @@ EXAMPLE_NAMESPACES = ["nfcore", "geo", "demo"] -EXAMPLE_REGISTRIES = ["geo/GSE102804:default", "demo/basic:default", "nfcore/demo_rna_pep:default"] +EXAMPLE_REGISTRIES = [ + "geo/GSE102804:default", + "demo/basic:default", + "nfcore/demo_rna_pep:default", +] class TestDatafetching: From ef675686b3854a1120566b3f7e2e2178f7472a69 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Aug 2022 14:05:18 -0400 Subject: [PATCH 16/40] minor changes --- pepagent/pepagent.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 4bf6453..73c404c 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -353,7 +353,6 @@ def get_projects_in_list( # should we raise an error or just warn with the logger? raise ValueError(f"Invalid registry path supplied: '{rpath}'") - # dynamically build filter for set of registry paths parametrized_filter = "" for i in range(len(registry_paths)): parametrized_filter += "(namespace=%s and name=%s)" @@ -549,7 +548,8 @@ def get_project_annotation_by_registry( def get_namespace_annotation(self, namespace: str = None) -> dict: """ - Retrieving namespace annotation dict with number of tags, projects and samples. + Retrieving namespace annotation dict. + Data that will be retrieved: number of tags, projects and samples If namespace is None it will retrieve dict with all namespace annotations. :param namespace: project namespace """ @@ -675,7 +675,7 @@ def project_status( namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG, - ) -> str: + ) -> Union[str, None]: """ Retrieve project status by providing name, namespace and tag :param namespace: project registry - will return dict of project annotations @@ -701,9 +701,11 @@ def project_status( _LOGGER.error("Project does not exist, returning None") return "None" - result = self.run_sql_fetchone(sql_q, namespace, name, tag) - - return result[0] + try: + result = self.run_sql_fetchone(sql_q, namespace, name, tag)[0] + except IndexError: + return None + return result def project_status_by_registry( self, From 686cbb395c8938a59e60d894e4b3b53c32312d43 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Aug 2022 14:06:50 -0400 Subject: [PATCH 17/40] minor changes 2 --- pepagent/pepagent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pepagent/pepagent.py b/pepagent/pepagent.py index 73c404c..b3f9b53 100644 --- a/pepagent/pepagent.py +++ b/pepagent/pepagent.py @@ -704,7 +704,7 @@ def project_status( try: result = self.run_sql_fetchone(sql_q, namespace, name, tag)[0] except IndexError: - return None + return "Unknown" return result def project_status_by_registry( From c5b013d0a1ed8891b6be1436089933aa1c134e72 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Aug 2022 13:15:23 -0400 Subject: [PATCH 18/40] renaming --- README.md | 7 +++++-- {pepagent => pepdbagent}/__init__.py | 2 +- {pepagent => pepdbagent}/_version.py | 0 {pepagent => pepdbagent}/const.py | 0 {pepagent => pepdbagent}/exceptions.py | 0 pepagent/pepagent.py => pepdbagent/pepdbagent.py | 2 +- {pepagent => pepdbagent}/utils.py | 0 requirements/requirements-all.txt | 1 + tests/test_pepagent.py | 6 +++--- 9 files changed, 11 insertions(+), 7 deletions(-) rename {pepagent => pepdbagent}/__init__.py (70%) rename {pepagent => pepdbagent}/_version.py (100%) rename {pepagent => pepdbagent}/const.py (100%) rename {pepagent => pepdbagent}/exceptions.py (100%) rename pepagent/pepagent.py => pepdbagent/pepdbagent.py (99%) rename {pepagent => pepdbagent}/utils.py (100%) diff --git a/README.md b/README.md index 772b88b..a3074b2 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,15 @@ Complete instruction can be found here: [pep_db](pep_db) --- ## How to use pepagent +```python +from pepdbagent import Connection +``` 1) Create connection with DB: ```python # 1) By providing credentials and connection information: -projectDB = PEPagent(user="postgres", password="docker",) +projectDB = Connection(user="postgres", password="docker",) # 2) or By providing connection string: -projectDB = PEPagent("postgresql://postgres:docker@localhost:5432/pep-base-sql") +projectDB = Connection("postgresql://postgres:docker@localhost:5432/pep-base-sql") ``` 2) Add new project to the DB diff --git a/pepagent/__init__.py b/pepdbagent/__init__.py similarity index 70% rename from pepagent/__init__.py rename to pepdbagent/__init__.py index 8a5e676..0ac0b52 100644 --- a/pepagent/__init__.py +++ b/pepdbagent/__init__.py @@ -1,3 +1,3 @@ """ Package-level data """ -from .pepagent import * +from .pepdbagent import * from ._version import __version__ diff --git a/pepagent/_version.py b/pepdbagent/_version.py similarity index 100% rename from pepagent/_version.py rename to pepdbagent/_version.py diff --git a/pepagent/const.py b/pepdbagent/const.py similarity index 100% rename from pepagent/const.py rename to pepdbagent/const.py diff --git a/pepagent/exceptions.py b/pepdbagent/exceptions.py similarity index 100% rename from pepagent/exceptions.py rename to pepdbagent/exceptions.py diff --git a/pepagent/pepagent.py b/pepdbagent/pepdbagent.py similarity index 99% rename from pepagent/pepagent.py rename to pepdbagent/pepdbagent.py index b3f9b53..21a5c24 100644 --- a/pepagent/pepagent.py +++ b/pepdbagent/pepdbagent.py @@ -26,7 +26,7 @@ ) -class PEPagent: +class Connection: """ A class to connect to pep-db and upload, download, read and process pep projects. """ diff --git a/pepagent/utils.py b/pepdbagent/utils.py similarity index 100% rename from pepagent/utils.py rename to pepdbagent/utils.py diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 0a073c9..3a01dd7 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,3 +2,4 @@ psycopg2-binary logmuse peppy>=0.34.0 ubiquerg>=0.6.2 +coloredlogs>=15.0.1 diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 36a6f6a..9128995 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -1,7 +1,7 @@ import os import pytest import peppy -from pepagent import PEPagent +from pepdbagent import Connection from dotenv import load_dotenv load_dotenv() @@ -17,13 +17,13 @@ class TestDatafetching: - db = PEPagent( + db = Connection( user=os.environ.get("POSTGRES_USER") or "postgres", password=os.environ.get("POSTGRES_PASSWORD") or "docker", ) def test_connection(self): - assert isinstance(self.db, PEPagent) + assert isinstance(self.db, Connection) @pytest.mark.parametrize("registry", EXAMPLE_REGISTRIES) def test_get_project_by_registry(self, registry): From 4f114bb4bc28334c19848d25111ac1e8ee876579 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Aug 2022 13:55:10 -0400 Subject: [PATCH 19/40] documentation --- README.md | 157 +++----------------------------------------- docs/README.md | 0 docs/db_tutorial.md | 17 +++++ docs/tutorial.md | 154 +++++++++++++++++++++++++++++++++++++++++++ pep_db/README.md | 18 ----- 5 files changed, 181 insertions(+), 165 deletions(-) create mode 100644 docs/README.md create mode 100644 docs/db_tutorial.md create mode 100644 docs/tutorial.md delete mode 100644 pep_db/README.md diff --git a/README.md b/README.md index a3074b2..24300ee 100644 --- a/README.md +++ b/README.md @@ -1,152 +1,15 @@ -# pepagent + pep_db +# pepdbagent -Database and PEPagent for storing and processing pep projects +pepdbagent is a python library and toolkit that gives a user user-friendly +interface to connect and retrieve information from pep-db. ---- -## How to create pep_db: +It provides a various comprehensive functions that enables user to retrieve +projects, annotations, sets and other information from pep-db. -Complete instruction can be found here: [pep_db](pep_db) +**pep-db** is and postgres database created for storing [PEPs](http://pep.databio.org/en/latest/). +**pep-db** is a backend database for PEPhub. It enables storing huge projects and provides fast speed. ---- -## How to use pepagent -```python -from pepdbagent import Connection -``` -1) Create connection with DB: -```python -# 1) By providing credentials and connection information: -projectDB = Connection(user="postgres", password="docker",) -# 2) or By providing connection string: -projectDB = Connection("postgresql://postgres:docker@localhost:5432/pep-base-sql") -``` +Here you can find more information with tutorials: -2) Add new project to the DB -```python -# initiate peppy Project -pep_project = peppy.Project("/sample_pep/subtable3/project_config.yaml") -# use upload_project function to add this project to the DB -projectDB.upload_project(pep_project, namespace = "Test", anno={"project": "annotation_dict"}) -# additionally you can specify name and tag of the project - -# update project - -projectDB.update_project(pep_project, namespace = "Test", anno={"enot": "annotation_dict"}) -# additionally you can specify name and tag of the project - -``` - -3) Get list of projects in namespace: -```python -list_of_namespaces = projectDB.get_namespace_info(namespace="King") -print(list_of_namespaces) - -``` - -4) Get list of available namespaces: - -```python -list_of_namespaces = projectDB.get_namespaces_info_by_list() -print(list_of_namespaces) -# To get list with with just names of namespaces set: names=True -# otherwise you will get list with namespaces with information about all projects -``` - -5) Get project - -```python - -# Get project by registry -pr_ob = projectDB.get_project_by_registry(registry_path='Test/subtable3') -print(pr_ob.samples) - -# Get project by registry -pr_ob = projectDB.get_project_by_registry(registry_path='Test/subtable3:this_is_tag') -print(pr_ob.samples) - -# Get project by namespace and name -pr_ob = projectDB.get_project(namespace='Test', name='subtable3') -print(pr_ob.samples) - -# Get project by namespace and name -pr_ob = projectDB.get_project(namespace='Test', name='subtable3', tag='this_is_tag') -print(pr_ob.samples) - -``` - -4) Get list of projects - -```python -# Get projects by tag -pr_ob = projectDB.get_projects_in_namespace(tag='new_tag') -print(pr_ob.samples) - -# Get projects by namespace -pr_ob = projectDB.get_projects_in_namespace(namespace='King') -print(pr_ob.samples) - -# Get projects by namespace and tag -pr_ob = projectDB.get_projects_in_namespace(namespace='King', tag='taggg') -print(pr_ob.samples) - -# Get projects by list of registry paths -pr_ob = projectDB.get_projects_in_list(registry_paths=['Test/subtable3:default', 'Test/subtable3:bbb']) -print(pr_ob.samples) - -# Get all the projects -pr_ob = projectDB.get_project_all() -print(pr_ob.samples) - -``` - -5) Get annotation about single project or projects: - -```python - -# Get dictionary of annotation for 1 project by registry -projects_anno_list = projectDB.get_project_annotation(digest='Test/subtable3:this_is_tag') -# if tag is not set default tag will be set -projects_anno_list = projectDB.get_project_annotation(namespace='Test/subtable3') -``` - -6) Get annotations namespace or all namespaces: - -```python -# Get dictionary of annotation for specific namespace -namespace_anno = projectDB.get_namespace_annotation(namespace='Test') - -# Get dictiionary of annotations for all namespaces -namespace_anno_all = projectDB.get_namespace_annotation() -``` - - -7) Check project existance: - -```python -# by name and namespace: -projectDB.project_exists(namespace="nn", name="buu") - -# by name and namespace and tag: -projectDB.project_exists(namespace="nn", name="buu", tag='dog') - -# by registry path: -projectDB.project_exists_by_registry(registry_path='nn/buu/dog') - -``` - - -8) Check project status: - -```python -# by name and namespace and tag: -# Get dictionary of annotation for specific namespace -projectDB.project_status(namespace="nn", name="buu", tag='dog') - -# by registry path: -projectDB.project_status_by_registry(registry_path='nn/buu/dog') -``` - -9) Get registry paths of all the projects by digest: - -```python -projectDB.get_registry_paths_by_digest(digest='sdafsgwerg243rt2gregw3qr24') -``` \ No newline at end of file +- [pep-db installation](./docs/db_tutorial.md) +- [pedbagent](./docs.tutorial.md) \ No newline at end of file diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/db_tutorial.md b/docs/db_tutorial.md new file mode 100644 index 0000000..8fd00a2 --- /dev/null +++ b/docs/db_tutorial.md @@ -0,0 +1,17 @@ +# pep_db + +### pep_db installation: + + +0) Go to [pep_db](../pep_db) directory and then run the following lines +1) Build the docker: `docker build -t pep-db ./` +2) Run the docker: `docker run --name pep-db -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=docker -p 5432:5432 -d pep-db` +3) Start it: `docker start pep-db` + +Now db is installed + +### How to connect to the docker: + +`docker exec -it 65f bash` + +`psql -U postgres -d pep-db` \ No newline at end of file diff --git a/docs/tutorial.md b/docs/tutorial.md new file mode 100644 index 0000000..32af69c --- /dev/null +++ b/docs/tutorial.md @@ -0,0 +1,154 @@ +# pepagent + pep_db + +Database and PEPagent for storing and processing pep projects + +--- +## How to create pep_db: + +Complete instruction can be found here: [pep_db](pep_db) + +--- +## Brief pephubdb tutorial: + +0) Import Connection from pepdbagent: +```python +from pepdbagent import Connection +``` +1) Create connection with DB: +```python +# 1) By providing credentials and connection information: +projectDB = Connection(user="postgres", password="docker",) +# 2) or By providing connection string: +projectDB = Connection("postgresql://postgres:docker@localhost:5432/pep-base-sql") +``` + +2) Add new project to the DB +```python +# initiate peppy Project +pep_project = peppy.Project("/sample_pep/subtable3/project_config.yaml") +# use upload_project function to add this project to the DB +projectDB.upload_project(pep_project, namespace = "Test", anno={"project": "annotation_dict"}) +# additionally you can specify name and tag of the project + +# update project + +projectDB.update_project(pep_project, namespace = "Test", anno={"enot": "annotation_dict"}) +# additionally you can specify name and tag of the project + +``` + +3) Get list of projects in namespace: +```python +list_of_namespaces = projectDB.get_namespace_info(namespace="King") +print(list_of_namespaces) + +``` + +4) Get list of available namespaces: + +```python +list_of_namespaces = projectDB.get_namespaces_info_by_list() +print(list_of_namespaces) +# To get list with with just names of namespaces set: names=True +# otherwise you will get list with namespaces with information about all projects +``` + +5) Get project + +```python + +# Get project by registry +pr_ob = projectDB.get_project_by_registry(registry_path='Test/subtable3') +print(pr_ob.samples) + +# Get project by registry +pr_ob = projectDB.get_project_by_registry(registry_path='Test/subtable3:this_is_tag') +print(pr_ob.samples) + +# Get project by namespace and name +pr_ob = projectDB.get_project(namespace='Test', name='subtable3') +print(pr_ob.samples) + +# Get project by namespace and name +pr_ob = projectDB.get_project(namespace='Test', name='subtable3', tag='this_is_tag') +print(pr_ob.samples) + +``` + +4) Get list of projects + +```python +# Get projects by tag +pr_ob = projectDB.get_projects_in_namespace(tag='new_tag') +print(pr_ob.samples) + +# Get projects by namespace +pr_ob = projectDB.get_projects_in_namespace(namespace='King') +print(pr_ob.samples) + +# Get projects by namespace and tag +pr_ob = projectDB.get_projects_in_namespace(namespace='King', tag='taggg') +print(pr_ob.samples) + +# Get projects by list of registry paths +pr_ob = projectDB.get_projects_in_list(registry_paths=['Test/subtable3:default', 'Test/subtable3:bbb']) +print(pr_ob.samples) + +# Get all the projects +pr_ob = projectDB.get_project_all() +print(pr_ob.samples) + +``` + +5) Get annotation about single project or projects: + +```python + +# Get dictionary of annotation for 1 project by registry +projects_anno_list = projectDB.get_project_annotation(digest='Test/subtable3:this_is_tag') +# if tag is not set default tag will be set +projects_anno_list = projectDB.get_project_annotation(namespace='Test/subtable3') +``` + +6) Get annotations namespace or all namespaces: + +```python +# Get dictionary of annotation for specific namespace +namespace_anno = projectDB.get_namespace_annotation(namespace='Test') + +# Get dictiionary of annotations for all namespaces +namespace_anno_all = projectDB.get_namespace_annotation() +``` + + +7) Check project existance: + +```python +# by name and namespace: +projectDB.project_exists(namespace="nn", name="buu") + +# by name and namespace and tag: +projectDB.project_exists(namespace="nn", name="buu", tag='dog') + +# by registry path: +projectDB.project_exists_by_registry(registry_path='nn/buu/dog') + +``` + + +8) Check project status: + +```python +# by name and namespace and tag: +# Get dictionary of annotation for specific namespace +projectDB.project_status(namespace="nn", name="buu", tag='dog') + +# by registry path: +projectDB.project_status_by_registry(registry_path='nn/buu/dog') +``` + +9) Get registry paths of all the projects by digest: + +```python +projectDB.get_registry_paths_by_digest(digest='sdafsgwerg243rt2gregw3qr24') +``` \ No newline at end of file diff --git a/pep_db/README.md b/pep_db/README.md deleted file mode 100644 index b1d602f..0000000 --- a/pep_db/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# pep_db - -### How to create new postgres db: - - -https://dev.to/andre347/how-to-easily-create-a-postgres-database-in-docker-4moj - - -0) Go into this directory and then run the following lines -1) docker build -t pep-base-sql ./ -2) docker run --name pep-base-sql -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=docker -p 5432:5432 -d pep-base-sql -3) docker start pep-base-sql - - -### How to connect to the docker - -docker exec -it 65f bash -psql -U postgres -d pep-base-sql \ No newline at end of file From 72759e67ebf9a5e55b3800947a5698b6f8f9b931 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Aug 2022 20:34:33 -0400 Subject: [PATCH 20/40] setup --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 3819048..ad5aea8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import os from setuptools import find_packages, setup -PACKAGE_NAME = "pepagent" +PACKAGE_NAME = "pepdbagent" # Ordinary dependencies DEPENDENCIES = [] @@ -52,7 +52,7 @@ "Topic :: Scientific/Engineering :: Bio-Informatics", ], keywords="project, metadata, bioinformatics, database", - url="https://github.com/pepkit/peppy/", + url="https://github.com/pepkit/pepdbagent/", author="Oleksandr Khoroshevskyi", # license="", include_package_data=True, From 873dad42345bcea73d0fc9da11223adfac090106 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Aug 2022 20:36:33 -0400 Subject: [PATCH 21/40] naming --- pep_db/Dockerfile | 2 +- pepdbagent/pepdbagent.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pep_db/Dockerfile b/pep_db/Dockerfile index 638be36..4343fbb 100644 --- a/pep_db/Dockerfile +++ b/pep_db/Dockerfile @@ -1,5 +1,5 @@ FROM postgres ENV POSTGRES_USER postgres ENV POSTGRES_PASSWORD docker -ENV POSTGRES_DB pep-base-sql +ENV POSTGRES_DB pep-db COPY pep_db.sql /docker-entrypoint-initdb.d/ \ No newline at end of file diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 21a5c24..301b79a 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -36,7 +36,7 @@ def __init__( dsn=None, host="localhost", port=5432, - database="pep-base-sql", + database="pep-db", user=None, password=None, ): From f0df9f58e95f03e1513383871384fc4aa05d4044 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 23 Aug 2022 10:22:15 -0400 Subject: [PATCH 22/40] digest fix --- README.md | 2 +- pepdbagent/pepdbagent.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 24300ee..f86791d 100644 --- a/README.md +++ b/README.md @@ -12,4 +12,4 @@ projects, annotations, sets and other information from pep-db. Here you can find more information with tutorials: - [pep-db installation](./docs/db_tutorial.md) -- [pedbagent](./docs.tutorial.md) \ No newline at end of file +- [pedbagent](./docs/tutorial.md) \ No newline at end of file diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 301b79a..403562a 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -7,8 +7,6 @@ from hashlib import md5 from itertools import chain import ubiquerg -import sys -import os import datetime from .utils import all_elements_are_strings, is_valid_resgistry_path @@ -17,6 +15,8 @@ import coloredlogs # from pprint import pprint +# import sys +# import os _LOGGER = logmuse.init_logger("pepDB_connector") coloredlogs.install( @@ -100,6 +100,9 @@ def upload_project( proj_digest = self._create_digest(proj_dict) + if anno is None: + anno = {} + # adding project status to db: if STATUS_KEY in anno: proj_status = anno[STATUS_KEY] @@ -793,7 +796,7 @@ def _create_digest(project_dict: dict) -> str: """ _LOGGER.info(f"Creating digest for: {project_dict['name']}") sample_digest = md5( - json.dumps(project_dict["_samples"], sort_keys=True).encode("utf-8") + json.dumps(project_dict["_sample_df"], sort_keys=True).encode("utf-8") ).hexdigest() return sample_digest From 0f3bcb8bf0c8898c8fa8a2bdd8b95ae75e760791 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 30 Aug 2022 12:12:53 -0400 Subject: [PATCH 23/40] Fixed #31 --- docs/tutorial.md | 2 +- pepdbagent/const.py | 2 ++ pepdbagent/pepdbagent.py | 2 +- requirements/requirements-all.txt | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/tutorial.md b/docs/tutorial.md index 32af69c..910aa5a 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -19,7 +19,7 @@ from pepdbagent import Connection # 1) By providing credentials and connection information: projectDB = Connection(user="postgres", password="docker",) # 2) or By providing connection string: -projectDB = Connection("postgresql://postgres:docker@localhost:5432/pep-base-sql") +projectDB = Connection("postgresql://postgres:docker@localhost:5432/pep-db") ``` 2) Add new project to the DB diff --git a/pepdbagent/const.py b/pepdbagent/const.py index e9b2180..c2899c5 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -14,3 +14,5 @@ STATUS_KEY = "status" DEFAULT_STATUS = "1" + +from peppy.const import SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_DICT_KEY \ No newline at end of file diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 403562a..9fe86ea 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -796,7 +796,7 @@ def _create_digest(project_dict: dict) -> str: """ _LOGGER.info(f"Creating digest for: {project_dict['name']}") sample_digest = md5( - json.dumps(project_dict["_sample_df"], sort_keys=True).encode("utf-8") + json.dumps(project_dict[SAMPLE_RAW_DICT_KEY], sort_keys=True).encode("utf-8") ).hexdigest() return sample_digest diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 3a01dd7..604102b 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,5 +1,5 @@ psycopg2-binary logmuse -peppy>=0.34.0 +peppy>=0.35.0 ubiquerg>=0.6.2 coloredlogs>=15.0.1 From 921b46516ba552caae05afaf56cb03e7751a82b7 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 30 Aug 2022 12:38:52 -0400 Subject: [PATCH 24/40] Fixed #30 --- pepdbagent/pepdbagent.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 9fe86ea..ab4a8bf 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -13,6 +13,7 @@ from .const import * from .exceptions import SchemaError import coloredlogs +from urllib.parse import urlparse # from pprint import pprint # import sys @@ -44,6 +45,7 @@ def __init__( if dsn is not None: self.postgresConnection = psycopg2.connect(dsn) + self.db_name = urlparse(dsn).path[1:] else: self.postgresConnection = psycopg2.connect( host=host, @@ -52,6 +54,7 @@ def __init__( user=user, password=password, ) + self.db_name = database # Ensure data is added to the database immediately after write commands self.postgresConnection.autocommit = True @@ -818,3 +821,11 @@ def _check_conn_db(self) -> None: cols_name.sort() if DB_COLUMNS != cols_name: raise SchemaError + + def __exit__(self): + self.close_connection() + + def __str__(self): + return f"Connection to the database: '{self.db_name}' is set!" + + From 4bf9632b5b5dcc74a066dbafe4493cdbd053c912 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 30 Aug 2022 18:20:27 -0400 Subject: [PATCH 25/40] Added annotation class --- pepdbagent/const.py | 14 +++- pepdbagent/pepannot.py | 164 +++++++++++++++++++++++++++++++++++++ pepdbagent/pepdbagent.py | 173 +++++++++++++++++++++------------------ 3 files changed, 269 insertions(+), 82 deletions(-) create mode 100644 pepdbagent/pepannot.py diff --git a/pepdbagent/const.py b/pepdbagent/const.py index c2899c5..0dc451e 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -13,6 +13,16 @@ DEFAULT_TAG = "default" STATUS_KEY = "status" -DEFAULT_STATUS = "1" +DESCRIPTION_KEY = "description" +N_SAMPLES_KEY = "n_samples" +UPDATE_DATE_KEY = "last_update" +DEFAULT_STATUS = "Unknown" -from peppy.const import SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_DICT_KEY \ No newline at end of file +BASE_ANNOTATION_DICT = { + STATUS_KEY: DEFAULT_STATUS, + DESCRIPTION_KEY: None, + N_SAMPLES_KEY: None, + UPDATE_DATE_KEY: None, +} + +from peppy.const import SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_DICT_KEY diff --git a/pepdbagent/pepannot.py b/pepdbagent/pepannot.py new file mode 100644 index 0000000..77bcd04 --- /dev/null +++ b/pepdbagent/pepannot.py @@ -0,0 +1,164 @@ +from const import ( + STATUS_KEY, + DESCRIPTION_KEY, + N_SAMPLES_KEY, + UPDATE_DATE_KEY, + BASE_ANNOTATION_DICT, + DEFAULT_STATUS, +) +import json + + +class Annotation(dict): + """ + A class to model an annotations used in pep-db + """ + + def __init__(self, registry: str = None, annotation_dict: dict = None): + + super(Annotation, self).__init__() + if annotation_dict is None: + annotation_dict = BASE_ANNOTATION_DICT + self.registry = registry + + self.annotation_dict = annotation_dict + + for dict_key in annotation_dict.keys(): + self[dict_key] = annotation_dict[dict_key] + + for dict_key in BASE_ANNOTATION_DICT: + if dict_key not in self: + self[dict_key] = BASE_ANNOTATION_DICT[dict_key] + + self._status = None + self._description = None + self._n_samples = None + self._last_update = None + self._property_setter(annotation_dict) + + @classmethod + def init_empty_annotation(cls): + """ + Initiate empty annotation + :return: Annotation + """ + init_dict = BASE_ANNOTATION_DICT + return Annotation(annotation_dict=init_dict) + + @classmethod + def create_new_annotation( + cls, + status: str = None, + last_update: str = None, + n_samples: int = None, + description: str = None, + anno_dict: dict = None, + ): + """ + Create a new annotation for pep-db + :param status: pep status + :param last_update: pep last update + :param n_samples: number of samples in pep + :param description: description of PEP + :param anno_dict: other + :return: Annotation class + """ + new_dict = BASE_ANNOTATION_DICT + if status: + new_dict[STATUS_KEY] = status + else: + new_dict[STATUS_KEY] = DEFAULT_STATUS + if last_update: + new_dict[UPDATE_DATE_KEY] = last_update + if n_samples: + new_dict[N_SAMPLES_KEY] = n_samples + if description: + new_dict[DESCRIPTION_KEY] = description + if anno_dict: + for dict_key in anno_dict.keys(): + new_dict[dict_key] = anno_dict[dict_key] + + return Annotation(annotation_dict=new_dict) + + def _property_setter(self, annot_dict: dict): + """ + Initialization of setters from annot_dict + :param annot_dict: Annotation dict + """ + if STATUS_KEY in annot_dict: + self.status = annot_dict[STATUS_KEY] + else: + self.status = "Unknown" + + if DESCRIPTION_KEY in annot_dict: + self.description = annot_dict[DESCRIPTION_KEY] + else: + self.description = "" + + if N_SAMPLES_KEY in annot_dict: + self.n_samples = annot_dict[N_SAMPLES_KEY] + else: + self.n_samples = None + + if UPDATE_DATE_KEY in annot_dict: + self.last_update = annot_dict[UPDATE_DATE_KEY] + else: + self.last_update = None + + def get_json(self): + return json.dumps(dict(self)) + + @property + def status(self) -> str: + return self._status + + @status.setter + def status(self, value): + self._status = value + self[STATUS_KEY] = value + + @property + def description(self) -> str: + return self._description + + @description.setter + def description(self, description: str): + self._description = description + self[DESCRIPTION_KEY] = description + + @property + def n_samples(self) -> str: + return self._n_samples + + @n_samples.setter + def n_samples(self, n_samples: str): + self._n_samples = n_samples + self[N_SAMPLES_KEY] = n_samples + + @property + def last_update(self) -> str: + return self._last_update + + @last_update.setter + def last_update(self, last_update: str): + self._last_update = last_update + self[UPDATE_DATE_KEY] = last_update + + @property + def registry(self) -> str: + return self.__registry + + @registry.setter + def registry(self, registry: str): + self.__registry = registry + + def __str__(self): + return f"This is annotation of the project: '{self.registry}'. \nAnnotations: \n{dict(self)}" + + def __dict__(self): + return dict(self) + + +bbb = Annotation(registry="asdfasdf", annotation_dict={}) +print(bbb.get_json()) +print(bbb) diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index ab4a8bf..0993689 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -12,6 +12,8 @@ from .utils import all_elements_are_strings, is_valid_resgistry_path from .const import * from .exceptions import SchemaError +from .pepannot import Annotation + import coloredlogs from urllib.parse import urlparse @@ -80,6 +82,8 @@ def upload_project( namespace: str = DEFAULT_NAMESPACE, name: str = None, tag: str = DEFAULT_TAG, + status: str = None, + description: str = None, anno: dict = None, update: bool = False, ) -> None: @@ -89,6 +93,8 @@ def upload_project( :param namespace: namespace of the project (Default: 'other') :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project + :param status: status of the project + :param description: description of the project :param anno: dict with annotations about current project :param update: boolean value if existed project has to be updated automatically """ @@ -103,25 +109,15 @@ def upload_project( proj_digest = self._create_digest(proj_dict) - if anno is None: - anno = {} - - # adding project status to db: - if STATUS_KEY in anno: - proj_status = anno[STATUS_KEY] - del anno[STATUS_KEY] - else: - proj_status = DEFAULT_STATUS + # creating annotation: + proj_annot = Annotation().create_new_annotation( + status=status, + description=description, + last_update=str(datetime.datetime.now()), + n_samples=len(project.samples), + anno_dict=anno, + ) - anno_info = { - "proj_description": proj_dict["description"], - "n_samples": len(project.samples), - "last_update": str(datetime.datetime.now()), - "status": proj_status, - } - if anno: - anno_info.update(anno) - anno_info = json.dumps(anno_info) proj_dict = json.dumps(proj_dict) try: @@ -136,7 +132,7 @@ def upload_project( tag, proj_digest, proj_dict, - anno_info, + proj_annot.get_json(), ), ) proj_id = cursor.fetchone()[0] @@ -154,7 +150,7 @@ def upload_project( name=proj_name, tag=tag, project=project, - anno=anno, + anno=proj_annot, ) else: _LOGGER.warning( @@ -476,10 +472,9 @@ def get_project_annotation( namespace: str = None, name: str = None, tag: str = None, - ) -> dict: + ) -> Annotation: """ Retrieving project annotation dict by specifying project name - Additionally you can return all namespace project annotations by specifying only namespace :param namespace: project registry_path - will return dict of project annotations :param name: project name in database :param tag: tag of the projects @@ -487,52 +482,41 @@ def get_project_annotation( """ sql_q = f""" select - {ID_COL}, {NAMESPACE_COL}, {NAME_COL}, {TAG_COL}, {ANNO_COL} from {DB_TABLE_NAME} """ + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG - if not name and not tag and namespace: - return self._get_namespace_proj_anno(namespace) - - if name and namespace and tag: + if name: sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s and {TAG_COL}=%s;""" found_prj = self.run_sql_fetchone(sql_q, name, namespace, tag) - elif name and namespace: - sql_q = f""" {sql_q} where {NAME_COL}=%s and {NAMESPACE_COL}=%s;""" - found_prj = self.run_sql_fetchone(sql_q, name, namespace) - - elif tag: - sql_q = f""" {sql_q} where {TAG_COL}=%s; """ - found_prj = self.run_sql_fetchone(sql_q, tag) - else: _LOGGER.error( - "You haven't provided neither namespace/name, digest nor id! Execution is unsuccessful" + "You haven't provided name, digest nor id! Execution is unsuccessful" ) _LOGGER.info("Files haven't been downloaded, returning empty dict") - return {} + return Annotation() - _LOGGER.info(f"Project has been found: {found_prj[0]}") + _LOGGER.info(f"Project has been found!") - anno_dict = { - ID_COL: found_prj[0], - NAMESPACE_COL: found_prj[1], - NAME_COL: found_prj[2], - TAG_COL: found_prj[3], - ANNO_COL: found_prj[4], - } + annot = Annotation( + registry=f"{found_prj[0]}/{found_prj[1]}:{found_prj[2]}", + annotation_dict=found_prj[3], + ) - return anno_dict + return annot def get_project_annotation_by_registry( self, registry_path: str, - ) -> dict: + ) -> Annotation: """ Retrieving project annotation dict by specifying registry path :param registry_path: project registry_path @@ -552,6 +536,65 @@ def get_project_annotation_by_registry( return self.get_project_annotation(namespace=namespace, name=name, tag=tag) + def get_projects_annotation_by_namespace(self, namespace: str) -> dict: + """ + Get list of all project annotations in namespace + :param namespace: namespace + return: dict of dicts with all projects in namespace + """ + + if not namespace: + _LOGGER.info(f"No namespace provided... returning empty list") + return {} + + sql_q = f"""select + {NAME_COL}, + {NAMESPACE_COL}, + {TAG_COL}, + {ANNO_COL} + from {DB_TABLE_NAME} where {NAMESPACE_COL}=%s;""" + + results = self.run_sql_fetchall(sql_q, namespace) + res_dict = {} + for result in results: + dict_key = f"{result[1]}/{result[0]}:{result[2]}" + res_dict[dict_key] = Annotation( + registry=dict_key, annotation_dict=result[3] + ) + + return res_dict + + def get_projects_annotation_by_namespace_tag( + self, namespace: str, tag: str + ) -> dict: + """ + Get list of all project annotations in namespace + :param tag: tag of the project + :param namespace: namespace + return: dict of dicts with all projects in namespace + """ + + if not namespace: + _LOGGER.info(f"No namespace provided... returning empty list") + return {} + + sql_q = f"""select + {NAME_COL}, + {NAMESPACE_COL}, + {TAG_COL}, + {ANNO_COL} + from {DB_TABLE_NAME} where namespace=%s and tag=%s;""" + + results = self.run_sql_fetchall(sql_q, namespace, tag) + res_dict = {} + for result in results: + dict_key = f"{result[1]}/{result[0]}:{result[2]}" + res_dict[dict_key] = Annotation( + registry=dict_key, annotation_dict=result[3] + ) + + return res_dict + def get_namespace_annotation(self, namespace: str = None) -> dict: """ Retrieving namespace annotation dict. @@ -592,36 +635,6 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: return anno_dict - def _get_namespace_proj_anno(self, namespace: str = None) -> dict: - """ - Get list of all project annotations in namespace - :param namespace: namespace - return: dict of dicts with all projects in namespace - """ - - if not namespace: - _LOGGER.info(f"No namespace provided... returning empty list") - return {} - - sql_q = f"""select - {ID_COL}, - {NAMESPACE_COL}, - {NAME_COL}, - {ANNO_COL} - from {DB_TABLE_NAME} where namespace='{namespace}';""" - - results = self.run_sql_fetchall(sql_q) - res_dict = {} - for result in results: - res_dict[result[2]] = { - ID_COL: result[0], - NAMESPACE_COL: result[1], - TAG_COL: result[3], - ANNO_COL: result[4], - } - - return res_dict - def project_exists( self, namespace: str = DEFAULT_NAMESPACE, @@ -799,7 +812,9 @@ def _create_digest(project_dict: dict) -> str: """ _LOGGER.info(f"Creating digest for: {project_dict['name']}") sample_digest = md5( - json.dumps(project_dict[SAMPLE_RAW_DICT_KEY], sort_keys=True).encode("utf-8") + json.dumps(project_dict[SAMPLE_RAW_DICT_KEY], sort_keys=True).encode( + "utf-8" + ) ).hexdigest() return sample_digest @@ -827,5 +842,3 @@ def __exit__(self): def __str__(self): return f"Connection to the database: '{self.db_name}' is set!" - - From bc7c4d3ef3e235ea680e739742dcd99582bce99f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 30 Aug 2022 19:09:17 -0400 Subject: [PATCH 26/40] Fixed #32 --- pepdbagent/pepdbagent.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 0993689..d3c6312 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -288,12 +288,17 @@ def get_project( "You haven't provided name! Execution is unsuccessful" "Files haven't been downloaded, returning empty project" ) - return peppy.Project() + return None if found_prj: _LOGGER.info(f"Project has been found: {found_prj[0]}") project_value = found_prj[1] - return peppy.Project().from_dict(project_value) + try: + project_obj = peppy.Project().from_dict(project_value) + return project_obj + except Exception: + _LOGGER.error(f"Error in init project. Error occurred in peppy. Project id={found_prj[0]}") + return None else: _LOGGER.warning( f"No project found for supplied input. Did you supply a valid namespace and project? {sql_q}" @@ -304,7 +309,7 @@ def get_projects_in_namespace( self, namespace: str = None, tag: str = None, - ) -> List[peppy.Project]: + ) -> list: """ Get a list of projects as peppy.Project instances. Get a list of projects in a namespace @@ -316,18 +321,18 @@ def get_projects_in_namespace( if namespace: if tag: sql_q = ( - f"select {NAME_COL}, {PROJ_COL} " + f"select {ID_COL}, {PROJ_COL} " f"from {DB_TABLE_NAME} " f"where namespace = %s and tag = %s" ) results = self.run_sql_fetchall(sql_q, namespace, tag) else: - sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where namespace = %s" + sql_q = f"select {ID_COL}, {PROJ_COL} from {DB_TABLE_NAME} where namespace = %s" results = self.run_sql_fetchall(sql_q, namespace) # Case 4. Get projects by namespace elif tag: - sql_q = f"select {NAME_COL}, {PROJ_COL} from {DB_TABLE_NAME} where tag = %s" + sql_q = f"select {ID_COL}, {PROJ_COL} from {DB_TABLE_NAME} where tag = %s" results = self.run_sql_fetchall(sql_q, tag) print(results) @@ -336,7 +341,14 @@ def get_projects_in_namespace( results = [] # extract out the project config dictionary from the query - return [peppy.Project().from_dict(p[1]) for p in results] + result_list = [] + for p in results: + try: + result_list.append(peppy.Project().from_dict(p[1])) + except Exception: + _LOGGER.error(f"Error in init project. Error occurred in peppy. Project id={p[0]}") + + return result_list def get_projects_in_list( self, From e0b52d0fa52a3b90bedad27ea008d87ed544585d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 30 Aug 2022 19:17:15 -0400 Subject: [PATCH 27/40] Fixed #33 --- pepdbagent/pepdbagent.py | 57 +++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index d3c6312..5140289 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -79,9 +79,9 @@ def close_connection(self) -> None: def upload_project( self, project: peppy.Project, - namespace: str = DEFAULT_NAMESPACE, + namespace: str = None, name: str = None, - tag: str = DEFAULT_TAG, + tag: str = None, status: str = None, description: str = None, anno: dict = None, @@ -100,15 +100,20 @@ def upload_project( """ cursor = self.postgresConnection.cursor() try: + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG proj_dict = project.to_dict(extended=True) + + proj_digest = self._create_digest(proj_dict) + if name: proj_name = name else: proj_name = proj_dict["name"] - proj_digest = self._create_digest(proj_dict) - # creating annotation: proj_annot = Annotation().create_new_annotation( status=status, @@ -172,9 +177,9 @@ def upload_project( def update_project( self, project: peppy.Project, - namespace: str = DEFAULT_NAMESPACE, + namespace: str = None, name: str = None, - tag: str = DEFAULT_TAG, + tag: str = None, anno: dict = None, ) -> None: """ @@ -189,6 +194,12 @@ def update_project( cursor = self.postgresConnection.cursor() proj_dict = project.to_dict(extended=True) + + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG + if name: proj_name = name else: @@ -255,18 +266,13 @@ def get_project_by_registry(self, registry_path: str = None): name = reg["item"] tag = reg["tag"] - if namespace is None: - namespace = DEFAULT_NAMESPACE - if tag is None: - tag = DEFAULT_TAG - return self.get_project(namespace=namespace, name=name, tag=tag) def get_project( self, - namespace: str = DEFAULT_NAMESPACE, + namespace: str = None, name: str = None, - tag: str = DEFAULT_TAG, + tag: str = None, ) -> Union[peppy.Project, None]: """ Retrieving project from database by specifying project registry_path, name, or digest @@ -275,6 +281,11 @@ def get_project( :param tag: tag of the project :return: peppy object with found project """ + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG + sql_q = f""" select {ID_COL}, {PROJ_COL} from {DB_TABLE_NAME} """ @@ -649,9 +660,9 @@ def get_namespace_annotation(self, namespace: str = None) -> dict: def project_exists( self, - namespace: str = DEFAULT_NAMESPACE, + namespace: str = None, name: str = None, - tag: str = DEFAULT_TAG, + tag: str = None, ) -> bool: """ Checking if project exists in the database @@ -660,6 +671,12 @@ def project_exists( :param tag: project tag :return: Returning True if project exist """ + if namespace is None: + namespace = DEFAULT_NAMESPACE + + if tag is None: + tag = DEFAULT_TAG + if name is None: _LOGGER.error(f"Name is not specified") return False @@ -703,9 +720,9 @@ def project_exists_by_registry( def project_status( self, - namespace: str = DEFAULT_NAMESPACE, + namespace: str = None, name: str = None, - tag: str = DEFAULT_TAG, + tag: str = None, ) -> Union[str, None]: """ Retrieve project status by providing name, namespace and tag @@ -715,11 +732,15 @@ def project_status( :return: status """ sql_q = f""" - select ({ANNO_COL}->>'status') as status + select ({ANNO_COL}->>'{STATUS_KEY}') as status from {DB_TABLE_NAME} WHERE {NAMESPACE_COL}=%s AND {NAME_COL}=%s AND {TAG_COL}=%s; """ + if namespace is None: + namespace = DEFAULT_NAMESPACE + if tag is None: + tag = DEFAULT_TAG if not name: _LOGGER.error( From ba8cd57325b465097c4f109aaebb1a1a772ff7d1 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 31 Aug 2022 10:31:39 -0400 Subject: [PATCH 28/40] updated update function --- pepdbagent/pepannot.py | 8 ++---- pepdbagent/pepdbagent.py | 54 +++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/pepdbagent/pepannot.py b/pepdbagent/pepannot.py index 77bcd04..311955b 100644 --- a/pepdbagent/pepannot.py +++ b/pepdbagent/pepannot.py @@ -1,4 +1,4 @@ -from const import ( +from .const import ( STATUS_KEY, DESCRIPTION_KEY, N_SAMPLES_KEY, @@ -14,7 +14,7 @@ class Annotation(dict): A class to model an annotations used in pep-db """ - def __init__(self, registry: str = None, annotation_dict: dict = None): + def __init__(self, annotation_dict: dict = None, registry: str = None): super(Annotation, self).__init__() if annotation_dict is None: @@ -158,7 +158,3 @@ def __str__(self): def __dict__(self): return dict(self) - -bbb = Annotation(registry="asdfasdf", annotation_dict={}) -print(bbb.get_json()) -print(bbb) diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 5140289..fcf5b1b 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -155,7 +155,7 @@ def upload_project( name=proj_name, tag=tag, project=project, - anno=proj_annot, + anno=proj_annot.get_json(), ) else: _LOGGER.warning( @@ -175,12 +175,14 @@ def upload_project( cursor.close() def update_project( - self, - project: peppy.Project, - namespace: str = None, - name: str = None, - tag: str = None, - anno: dict = None, + self, + project: peppy.Project, + namespace: str = None, + name: str = None, + tag: str = None, + status: str = None, + description: str = None, + anno: dict = None, ) -> None: """ Upload project to the database @@ -188,42 +190,36 @@ def update_project( :param namespace: namespace of the project (Default: 'other') :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project + :param status: status of the project + :param description: description of the project :param anno: dict with annotations about current project - :param update: boolean value if project hase to be updated """ - cursor = self.postgresConnection.cursor() - proj_dict = project.to_dict(extended=True) + cursor = self.postgresConnection.cursor() if namespace is None: namespace = DEFAULT_NAMESPACE if tag is None: tag = DEFAULT_TAG + proj_dict = project.to_dict(extended=True) + + proj_digest = self._create_digest(proj_dict) + if name: proj_name = name else: proj_name = proj_dict["name"] - proj_digest = self._create_digest(proj_dict) + # creating annotation: + proj_annot = Annotation().create_new_annotation( + status=status, + description=description, + last_update=str(datetime.datetime.now()), + n_samples=len(project.samples), + anno_dict=anno, + ) - # adding project status to db: - if STATUS_KEY in anno: - proj_status = anno[STATUS_KEY] - del anno[STATUS_KEY] - else: - proj_status = DEFAULT_STATUS - - anno_info = { - "proj_description": proj_dict["description"], - "n_samples": len(project.samples), - "last_update": str(datetime.datetime.now()), - "status": proj_status, - } - - if anno: - anno_info.update(anno) - anno_info = json.dumps(anno_info) proj_dict = json.dumps(proj_dict) if self.project_exists(namespace=namespace, name=proj_name, tag=tag): @@ -237,7 +233,7 @@ def update_project( ( proj_digest, proj_dict, - anno_info, + proj_annot, namespace, proj_name, tag, From 32aba9313fb5df0c852f9260f17eb2b517f747e6 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 31 Aug 2022 11:10:46 -0400 Subject: [PATCH 29/40] updated readme --- docs/tutorial.md | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/docs/tutorial.md b/docs/tutorial.md index 910aa5a..797b097 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -1,11 +1,6 @@ # pepagent + pep_db -Database and PEPagent for storing and processing pep projects - ---- -## How to create pep_db: - -Complete instruction can be found here: [pep_db](pep_db) +pepdbagent package for processing and uploading retrieving pep projects using python --- ## Brief pephubdb tutorial: @@ -14,6 +9,7 @@ Complete instruction can be found here: [pep_db](pep_db) ```python from pepdbagent import Connection ``` + 1) Create connection with DB: ```python # 1) By providing credentials and connection information: @@ -27,15 +23,16 @@ projectDB = Connection("postgresql://postgres:docker@localhost:5432/pep-db") # initiate peppy Project pep_project = peppy.Project("/sample_pep/subtable3/project_config.yaml") # use upload_project function to add this project to the DB -projectDB.upload_project(pep_project, namespace = "Test", anno={"project": "annotation_dict"}) +projectDB.upload_project(pep_project, namespace = "Test", status = "approved", description = "ocean dream", anno={"additional": "annotation"}) # additionally you can specify name and tag of the project -# update project +# update project* projectDB.update_project(pep_project, namespace = "Test", anno={"enot": "annotation_dict"}) # additionally you can specify name and tag of the project ``` +* If you want to update project you should specify all annotation fields, otherwise they will be empty 3) Get list of projects in namespace: ```python @@ -100,14 +97,25 @@ print(pr_ob.samples) ``` -5) Get annotation about single project or projects: +5) Get annotation about single project: ```python # Get dictionary of annotation for 1 project by registry -projects_anno_list = projectDB.get_project_annotation(digest='Test/subtable3:this_is_tag') +projects_anno_list = projectDB.get_project_annotation_by_registry(registry='Test/subtable3:this_is_tag') # if tag is not set default tag will be set projects_anno_list = projectDB.get_project_annotation(namespace='Test/subtable3') + +# As a return value user will get `Annotation` class object. There is two options to retrieve data: +#1) Using object as simple dict: +projects_anno_list["status"] +#2) Using .key ; Available keys: +projects_anno_list.registry # to know what project annotation is it +projects_anno_list.status +projects_anno_list.description +projects_anno_list.last_update +projects_anno_list.n_samples + ``` 6) Get annotations namespace or all namespaces: @@ -148,7 +156,6 @@ projectDB.project_status_by_registry(registry_path='nn/buu/dog') ``` 9) Get registry paths of all the projects by digest: - ```python projectDB.get_registry_paths_by_digest(digest='sdafsgwerg243rt2gregw3qr24') ``` \ No newline at end of file From 2d0f3b8735b29c3c07e2f4e15ef09ad337107794 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 1 Sep 2022 12:25:51 -0400 Subject: [PATCH 30/40] annot fix --- pepdbagent/pepdbagent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index fcf5b1b..bbbed69 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -425,7 +425,7 @@ def get_namespace_info(self, namespace: str) -> dict: :return: A dictionary representation of the namespace in the database """ try: - sql_q = f"select {ID_COL}, {NAME_COL}, {TAG_COL}, {DIGEST_COL}, {ANNO_COL} from {DB_TABLE_NAME} where namespace = %s" + sql_q = f"select {ID_COL}, {NAME_COL}, {TAG_COL}, {DIGEST_COL}, {ANNO_COL} from {DB_TABLE_NAME} where {NAMESPACE_COL} = %s" results = self.run_sql_fetchall(sql_q, namespace) projects = [ { @@ -433,8 +433,8 @@ def get_namespace_info(self, namespace: str) -> dict: "name": p[1], "tag": p[2], "digest": p[3], - "description": p[4]["proj_description"], - "n_samples": p[4]["n_samples"], + "description": Annotation(p[4]).description, + "n_samples": Annotation(p[4]).n_samples, } for p in results ] From 3478eade8709f2f30bcba5074ff5a2e20d6e972c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 1 Sep 2022 12:26:11 -0400 Subject: [PATCH 31/40] lint --- pepdbagent/pepannot.py | 1 - pepdbagent/pepdbagent.py | 24 ++++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pepdbagent/pepannot.py b/pepdbagent/pepannot.py index 311955b..c08a51e 100644 --- a/pepdbagent/pepannot.py +++ b/pepdbagent/pepannot.py @@ -157,4 +157,3 @@ def __str__(self): def __dict__(self): return dict(self) - diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index bbbed69..cb98c75 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -175,14 +175,14 @@ def upload_project( cursor.close() def update_project( - self, - project: peppy.Project, - namespace: str = None, - name: str = None, - tag: str = None, - status: str = None, - description: str = None, - anno: dict = None, + self, + project: peppy.Project, + namespace: str = None, + name: str = None, + tag: str = None, + status: str = None, + description: str = None, + anno: dict = None, ) -> None: """ Upload project to the database @@ -304,7 +304,9 @@ def get_project( project_obj = peppy.Project().from_dict(project_value) return project_obj except Exception: - _LOGGER.error(f"Error in init project. Error occurred in peppy. Project id={found_prj[0]}") + _LOGGER.error( + f"Error in init project. Error occurred in peppy. Project id={found_prj[0]}" + ) return None else: _LOGGER.warning( @@ -353,7 +355,9 @@ def get_projects_in_namespace( try: result_list.append(peppy.Project().from_dict(p[1])) except Exception: - _LOGGER.error(f"Error in init project. Error occurred in peppy. Project id={p[0]}") + _LOGGER.error( + f"Error in init project. Error occurred in peppy. Project id={p[0]}" + ) return result_list From d081592032f4d9043f416e8f001e3000eb8d675c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Sep 2022 12:51:38 -0400 Subject: [PATCH 32/40] Fixed #35 --- docs/tutorial.md | 1 + pepdbagent/pepannot.py | 22 +++++++++++++++++++--- pepdbagent/pepdbagent.py | 4 ++-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/docs/tutorial.md b/docs/tutorial.md index 797b097..44371ce 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -21,6 +21,7 @@ projectDB = Connection("postgresql://postgres:docker@localhost:5432/pep-db") 2) Add new project to the DB ```python # initiate peppy Project +import peppy pep_project = peppy.Project("/sample_pep/subtable3/project_config.yaml") # use upload_project function to add this project to the DB projectDB.upload_project(pep_project, namespace = "Test", status = "approved", description = "ocean dream", anno={"additional": "annotation"}) diff --git a/pepdbagent/pepannot.py b/pepdbagent/pepannot.py index c08a51e..ed57b8d 100644 --- a/pepdbagent/pepannot.py +++ b/pepdbagent/pepannot.py @@ -7,6 +7,15 @@ DEFAULT_STATUS, ) import json +import logmuse +import coloredlogs + +_LOGGER = logmuse.init_logger("pepannot") +coloredlogs.install( + logger=_LOGGER, + datefmt="%H:%M:%S", + fmt="[%(levelname)s] [%(asctime)s] %(message)s", +) class Annotation(dict): @@ -75,9 +84,16 @@ def create_new_annotation( if description: new_dict[DESCRIPTION_KEY] = description if anno_dict: - for dict_key in anno_dict.keys(): - new_dict[dict_key] = anno_dict[dict_key] - + try: + if not isinstance(anno_dict, dict): + assert TypeError + for dict_key in anno_dict.keys(): + new_dict[dict_key] = anno_dict[dict_key] + except TypeError: + _LOGGER.error("You have provided incorrect annotation dictionary type. " + "It's not a dict") + except AttributeError: + _LOGGER.error("Incorrect annotation dictionary type. Continuing..") return Annotation(annotation_dict=new_dict) def _property_setter(self, annot_dict: dict): diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index cb98c75..bcfaa16 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -155,7 +155,7 @@ def upload_project( name=proj_name, tag=tag, project=project, - anno=proj_annot.get_json(), + anno=dict(proj_annot), ) else: _LOGGER.warning( @@ -233,7 +233,7 @@ def update_project( ( proj_digest, proj_dict, - proj_annot, + proj_annot.get_json(), namespace, proj_name, tag, From df8b9867454b3d6651bddad40b2119531f50bbe0 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Sep 2022 12:52:40 -0400 Subject: [PATCH 33/40] lint --- pepdbagent/pepannot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pepdbagent/pepannot.py b/pepdbagent/pepannot.py index ed57b8d..124fa1b 100644 --- a/pepdbagent/pepannot.py +++ b/pepdbagent/pepannot.py @@ -90,8 +90,10 @@ def create_new_annotation( for dict_key in anno_dict.keys(): new_dict[dict_key] = anno_dict[dict_key] except TypeError: - _LOGGER.error("You have provided incorrect annotation dictionary type. " - "It's not a dict") + _LOGGER.error( + "You have provided incorrect annotation dictionary type. " + "It's not a dict" + ) except AttributeError: _LOGGER.error("Incorrect annotation dictionary type. Continuing..") return Annotation(annotation_dict=new_dict) From f0edf908b37d460c95291c5018b66e797e865aaf Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 14:15:43 -0400 Subject: [PATCH 34/40] skipped tests --- tests/test_pepagent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 9128995..77e6a90 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -2,9 +2,9 @@ import pytest import peppy from pepdbagent import Connection -from dotenv import load_dotenv +#from dotenv import load_dotenv -load_dotenv() +#load_dotenv() EXAMPLE_NAMESPACES = ["nfcore", "geo", "demo"] @@ -14,7 +14,7 @@ "nfcore/demo_rna_pep:default", ] - +@pytest.mark.skip(reason="no way of currently testing this") class TestDatafetching: db = Connection( From 745519f692768f39753cb8fdd37426d3b7baba0d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 14:16:51 -0400 Subject: [PATCH 35/40] lint --- tests/test_pepagent.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 77e6a90..1f07916 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -2,9 +2,10 @@ import pytest import peppy from pepdbagent import Connection -#from dotenv import load_dotenv -#load_dotenv() +# from dotenv import load_dotenv + +# load_dotenv() EXAMPLE_NAMESPACES = ["nfcore", "geo", "demo"] @@ -14,6 +15,7 @@ "nfcore/demo_rna_pep:default", ] + @pytest.mark.skip(reason="no way of currently testing this") class TestDatafetching: From c0169d7a69a5638c60015f3c3f08e98b18fca3a2 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 15:19:22 -0400 Subject: [PATCH 36/40] skip pytest2 --- tests/test_pepagent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 1f07916..9fa08f6 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -16,7 +16,7 @@ ] -@pytest.mark.skip(reason="no way of currently testing this") +@pytest.mark.skipif(True, reason="no way of currently testing this") class TestDatafetching: db = Connection( From 25f8f914958544cb69ff8fdb98d54a7bc2b7a76e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 15:28:33 -0400 Subject: [PATCH 37/40] skip pytest3 --- tests/test_pepagent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 9fa08f6..9ceeef9 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -19,10 +19,10 @@ @pytest.mark.skipif(True, reason="no way of currently testing this") class TestDatafetching: - db = Connection( - user=os.environ.get("POSTGRES_USER") or "postgres", - password=os.environ.get("POSTGRES_PASSWORD") or "docker", - ) + # db = Connection( + # user=os.environ.get("POSTGRES_USER") or "postgres", + # password=os.environ.get("POSTGRES_PASSWORD") or "docker", + # ) def test_connection(self): assert isinstance(self.db, Connection) From 67405d5bbfaa1980dbcf5a3bc138a1bee9915dc8 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 15:33:01 -0400 Subject: [PATCH 38/40] Added pypi upload file --- .github/workflows/python-publish.yml | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/python-publish.yml diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..4e1ef42 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,31 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* From a6439228c31ee6aa76680d6d905b5850f6ac0265 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 15:48:57 -0400 Subject: [PATCH 39/40] license + setup --- LICENSE.txt | 9 +++++++++ MANIFEST.in | 3 +++ setup.py | 24 ++++++++++++------------ 3 files changed, 24 insertions(+), 12 deletions(-) create mode 100644 LICENSE.txt create mode 100644 MANIFEST.in diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..1b78bad --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,9 @@ +Copyright 2017 Nathan Sheffield + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..fd948e0 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include LICENSE.txt +include requirements/* +include README.md diff --git a/setup.py b/setup.py index ad5aea8..5afaa73 100644 --- a/setup.py +++ b/setup.py @@ -17,17 +17,17 @@ extra = {"install_requires": DEPENDENCIES} # Additional files to include with package -# def get_static(name, condition=None): -# static = [ -# os.path.join(name, f) -# for f in os.listdir( -# os.path.join(os.path.dirname(os.path.realpath(__file__)), name) -# ) -# ] -# if condition is None: -# return static -# else: -# return [i for i in filter(lambda x: eval(condition), static)] +def get_static(name, condition=None): + static = [ + os.path.join(name, f) + for f in os.listdir( + os.path.join(os.path.dirname(os.path.realpath(__file__)), name) + ) + ] + if condition is None: + return static + else: + return [i for i in filter(lambda x: eval(condition), static)] with open(f"{PACKAGE_NAME}/_version.py", "r") as versionfile: @@ -54,7 +54,7 @@ keywords="project, metadata, bioinformatics, database", url="https://github.com/pepkit/pepdbagent/", author="Oleksandr Khoroshevskyi", - # license="", + license="BSD2", include_package_data=True, # tests_require=(["pytest"]), setup_requires=( From f6e95ebb584bc738761dea68e627d5c96412c8cd Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 15:52:34 -0400 Subject: [PATCH 40/40] version --- pepdbagent/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 607f7a4..3dc1f76 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.1.0-dev" +__version__ = "0.1.0"