diff --git a/docs/changelog.md b/docs/changelog.md index c12749c..2daa71f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.6.0] -- 2023-08-24 +- Added date filter to project annotation ## [0.5.5] -- 2023-07-19 - Updated requirements diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 86716a7..906d362 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.5.5" +__version__ = "0.6.0" diff --git a/pepdbagent/const.py b/pepdbagent/const.py index 68c8a4f..6fff364 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -15,3 +15,6 @@ POSTGRES_DIALECT = "postgresql" DEFAULT_LIMIT_INFO = 5 + +SUBMISSION_DATE_KEY = "submission_date" +LAST_UPDATE_DATE_KEY = "last_update_date" diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index f63b98f..cd094f6 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -26,3 +26,13 @@ def __init__(self, msg=""): class ProjectUniqueNameError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""{msg}""") + + +class IncorrectDateFormat(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Incorrect date format was provided. {msg}""") + + +class FilterError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""pepdbagent filter error. {msg}""") diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 9319ed8..3b7873c 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -1,16 +1,23 @@ import logging -from typing import List, Union +from datetime import datetime +from typing import List, Literal, Optional, Union -from sqlalchemy import Engine, func, select -from sqlalchemy import and_, or_ +from sqlalchemy import Engine, and_, func, or_, select from sqlalchemy.exc import IntegrityError from sqlalchemy.sql.selectable import Select -from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, DEFAULT_TAG, PKG_NAME -from pepdbagent.db_utils import Projects, BaseEngine -from pepdbagent.exceptions import ProjectNotFoundError, RegistryPathError +from pepdbagent.const import ( + DEFAULT_LIMIT, + DEFAULT_OFFSET, + DEFAULT_TAG, + PKG_NAME, + SUBMISSION_DATE_KEY, + LAST_UPDATE_DATE_KEY, +) +from pepdbagent.db_utils import BaseEngine, Projects +from pepdbagent.exceptions import FilterError, ProjectNotFoundError, RegistryPathError from pepdbagent.models import AnnotationList, AnnotationModel -from pepdbagent.utils import registry_path_converter, tuple_converter +from pepdbagent.utils import convert_date_string_to_date, registry_path_converter, tuple_converter _LOGGER = logging.getLogger(PKG_NAME) @@ -40,6 +47,9 @@ def get( offset: int = DEFAULT_OFFSET, order_by: str = "update_date", order_desc: bool = False, + filter_by: Optional[Literal["submission_date", "last_update_date"]] = None, + filter_start_date: Optional[str] = None, + filter_end_date: Optional[str] = None, ) -> AnnotationList: """ Get project annotations. @@ -62,7 +72,12 @@ def get( Options: ["name", "update_date", "submission_date"] [Default: update_date] :param order_desc: Sort the records in descending order. [Default: False] - :return: pydantic model: AnnotationReturnModel + :param filter_by: data to use filter on. + Options: ["submission_date", "last_update_date"] + [Default: filter won't be used] + :param filter_start_date: Filter start date. Format: "YYYY/MM/DD" + :param filter_end_date: Filter end date. Format: "YYYY/MM/DD". if None: present date will be used + :return: pydantic model: AnnotationList """ if all([namespace, name, tag]): found_annotation = [ @@ -82,7 +97,14 @@ def get( return AnnotationList( limit=limit, offset=offset, - count=self._count_projects(namespace=namespace, search_str=query, admin=admin), + count=self._count_projects( + namespace=namespace, + search_str=query, + admin=admin, + filter_by=filter_by, + filter_end_date=filter_end_date, + filter_start_date=filter_start_date, + ), results=self._get_projects( namespace=namespace, search_str=query, @@ -91,6 +113,9 @@ def get( limit=limit, order_by=order_by, order_desc=order_desc, + filter_by=filter_by, + filter_end_date=filter_end_date, + filter_start_date=filter_start_date, ), ) @@ -200,12 +225,20 @@ def _count_projects( namespace: str = None, search_str: str = None, admin: Union[str, List[str]] = None, + filter_by: Optional[Literal["submission_date", "last_update_date"]] = None, + filter_start_date: Optional[str] = None, + filter_end_date: Optional[str] = None, ) -> int: """ Count projects. [This function is related to _find_projects] :param namespace: namespace where to search for a project :param search_str: search string. will be searched in name, tag and description information :param admin: string or list of admins [e.g. "Khoroshevskyi", or ["doc_adin","Khoroshevskyi"]] + :param filter_by: data to use filter on. + Options: ["submission_date", "last_update_date"] + [Default: filter won't be used] + :param filter_start_date: Filter start date. Format: "YYYY:MM:DD" + :param filter_end_date: Filter end date. Format: "YYYY:MM:DD". if None: present date will be used :return: number of found project in specified namespace """ if admin is None: @@ -217,6 +250,9 @@ def _count_projects( search_str=search_str, admin_list=admin, ) + statement = self._add_date_filter_if_provided( + statement, filter_by, filter_start_date, filter_end_date + ) result = self._pep_db_engine.session_execute(statement).first() try: @@ -233,6 +269,9 @@ def _get_projects( offset: int = DEFAULT_OFFSET, order_by: str = "update_date", order_desc: bool = False, + filter_by: Optional[Literal["submission_date", "last_update_date"]] = None, + filter_start_date: Optional[str] = None, + filter_end_date: Optional[str] = None, ) -> List[AnnotationModel]: """ Get projects by providing search string. @@ -246,6 +285,11 @@ def _get_projects( Options: ["name", "update_date", "submission_date"] [Default: "update_date"] :param order_desc: Sort the records in descending order. [Default: False] + :param filter_by: data to use filter on. + Options: ["submission_date", "last_update_date"] + [Default: filter won't be used] + :param filter_start_date: Filter start date. Format: "YYYY:MM:DD" + :param filter_end_date: Filter end date. Format: "YYYY:MM:DD". if None: present date will be used :return: list of found projects with their annotations. """ _LOGGER.info(f"Running annotation search: (namespace: {namespace}, query: {search_str}.") @@ -271,6 +315,9 @@ def _get_projects( search_str=search_str, admin_list=admin, ) + statement = self._add_date_filter_if_provided( + statement, filter_by, filter_start_date, filter_end_date + ) statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) statement = statement.limit(limit).offset(offset) @@ -312,7 +359,7 @@ def _add_order_by_keyword( order_by_obj = Projects.last_update_date elif by == "name": order_by_obj = Projects.name - elif by == "submission_date": + elif by == SUBMISSION_DATE_KEY: order_by_obj = Projects.submission_date else: _LOGGER.warning( @@ -362,6 +409,45 @@ def _add_condition( return statement + @staticmethod + def _add_date_filter_if_provided( + statement: Select, + filter_by: Optional[Literal["submission_date", "last_update_date"]], + filter_start_date: Optional[str], + filter_end_date: Optional[str] = None, + ): + """ + Add filter to where clause to sqlalchemy statement (in project search) + + :param statement: sqlalchemy representation of a SELECT statement with where clause + :param filter_by: data to use filter on. + Options: ["submission_date", "last_update_date"] + :param filter_start_date: Filter start date. Format: "YYYY:MM:DD" + :param filter_end_date: Filter end date. Format: "YYYY:MM:DD". if None: present date will be used + :return: sqlalchemy representation of a SELECT statement with where clause with added filter + """ + if filter_by and filter_start_date: + start_date = convert_date_string_to_date(filter_start_date) + if filter_end_date: + end_date = convert_date_string_to_date(filter_end_date) + else: + end_date = datetime.now() + if filter_by == SUBMISSION_DATE_KEY: + statement = statement.filter( + Projects.submission_date.between(start_date, end_date) + ) + elif filter_by == LAST_UPDATE_DATE_KEY: + statement = statement.filter( + Projects.last_update_date.between(start_date, end_date) + ) + else: + raise FilterError("Invalid filter_by was provided!") + return statement + else: + if filter_by: + _LOGGER.warning(f"filter_start_date was not provided, skipping filter...") + return statement + def get_project_number_in_namespace( self, namespace: str, diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index 36eaa41..dc11bcc 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -1,3 +1,4 @@ +import datetime import json from collections.abc import Iterable from hashlib import md5 @@ -6,7 +7,7 @@ import ubiquerg from peppy.const import SAMPLE_RAW_DICT_KEY -from .exceptions import RegistryPathError +from .exceptions import IncorrectDateFormat, RegistryPathError def is_valid_registry_path(rpath: str) -> bool: @@ -93,3 +94,13 @@ def tuple_converter(value: Union[tuple, list, str, None]) -> tuple: return tuple( " ", ) + + +def convert_date_string_to_date(date_string: str) -> datetime.datetime: + """ + Convert string into datetime format + + :param date_str: date string in format [YYYY/MM/DD]. e.g. 2022/02/22 + :return: datetime format + """ + return datetime.datetime.strptime(date_string, "%Y/%m/%d") + datetime.timedelta(days=1) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 918570e..0b8fbeb 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,6 @@ sqlalchemy>=2.0.0 logmuse -peppy>=0.35.7 +peppy>=0.40.0a4 ubiquerg>=0.6.2 coloredlogs>=15.0.1 pytest-mock diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 518e08e..7a5e84a 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -1,8 +1,10 @@ -import pytest -import peppy +import datetime import os -from pepdbagent.exceptions import ProjectNotFoundError +import peppy +import pytest + +from pepdbagent.exceptions import FilterError, ProjectNotFoundError DNS = f"postgresql://postgres:docker@localhost:5432/pep-db" @@ -397,6 +399,67 @@ def test_all_annotations_are_returned(self, initiate_pepdb_con, namespace, name) "pep_schema", } + @pytest.mark.parametrize( + "namespace, query, found_number", + [ + ["namespace1", "ame", 2], + [None, "re", 3], + ], + ) + def test_search_filter_success(self, initiate_pepdb_con, namespace, query, found_number): + date_now = datetime.datetime.now() + date_old = datetime.datetime.now() - datetime.timedelta(days=5) + result = initiate_pepdb_con.annotation.get( + namespace=namespace, + query=query, + admin="private_test", + filter_by="submission_date", + filter_start_date=date_old.strftime("%Y/%m/%d"), + filter_end_date=date_now.strftime("%Y/%m/%d"), + ) + assert len(result.results) == found_number + + @pytest.mark.parametrize( + "namespace, query, found_number", + [ + ["namespace1", "ame", 0], + [None, "re", 0], + ], + ) + def test_search_filter_zero_prj(self, initiate_pepdb_con, namespace, query, found_number): + date_now = datetime.datetime.now() - datetime.timedelta(days=2) + date_old = date_now - datetime.timedelta(days=2) + result = initiate_pepdb_con.annotation.get( + namespace=namespace, + query=query, + admin="private_test", + filter_by="submission_date", + filter_start_date=date_old.strftime("%Y/%m/%d"), + filter_end_date=date_now.strftime("%Y/%m/%d"), + ) + assert len(result.results) == found_number + + @pytest.mark.parametrize( + "namespace, query, found_number", + [ + ["namespace1", "ame", 2], + ], + ) + def test_search_incorrect_filter_by_string( + self, initiate_pepdb_con, namespace, query, found_number + ): + date_now = datetime.datetime.now() - datetime.timedelta(days=2) + date_old = date_now - datetime.timedelta(days=2) + with pytest.raises(FilterError): + result = initiate_pepdb_con.annotation.get( + namespace=namespace, + query=query, + admin="private_test", + filter_by="incorrect", + filter_start_date=date_old.strftime("%Y/%m/%d"), + filter_end_date=date_now.strftime("%Y/%m/%d"), + ) + class TestNamespace: """