Skip to content

Commit

Permalink
Merge pull request #131 from pepkit/dev
Browse files Browse the repository at this point in the history
Release 0.8.0
  • Loading branch information
khoroshevskyi authored Feb 26, 2024
2 parents 895eb86 + 020b6e3 commit 723e2d2
Show file tree
Hide file tree
Showing 10 changed files with 288 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cli-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:

- run: smokeshow upload htmlcov
env:
SMOKESHOW_GITHUB_STATUS_DESCRIPTION: CLI Coverage {coverage-percentage}
SMOKESHOW_GITHUB_STATUS_DESCRIPTION: Coverage {coverage-percentage}
SMOKESHOW_GITHUB_COVERAGE_THRESHOLD: 50
SMOKESHOW_GITHUB_CONTEXT: coverage
SMOKESHOW_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down
17 changes: 13 additions & 4 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,35 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.7.3] -- 2023-02-05
## [0.8.0] -- 2024-02-26
- Fixed forking schema
- Improved forking efficiency [#129](https://github.com/pepkit/pepdbagent/issues/129)
- Added uploading project from dict
- Added get_config, get_smaples, get_subsamples methods to project module [#128](https://github.com/pepkit/pepdbagent/issues/128)
- Fixed error handling in views API [#130](https://github.com/pepkit/pepdbagent/issues/130)
- Added no_fail to views API


## [0.7.3] -- 2024-02-08
- Fixed POP update
- Improved error handling in views API
- Added stats method to Namespace module
- Updated docs
- Added coverage


## [0.7.2] -- 2023-02-02
## [0.7.2] -- 2024-02-02
- Fixed Unique Constraint in the Views
- Fixed update project pop method
- Fixed bug in duplicating samples


## [0.7.1] -- 2023-01-22
## [0.7.1] -- 2024-01-22
- Fixed bug in Stars annotation
- SQL efficiency improvements
- Added sort by date in stared projects

## [0.7.0] -- 2023-01-17
## [0.7.0] -- 2024-01-17
- Added `pop` to project table and annotation model [#107](https://github.com/pepkit/pepdbagent/issues/107)
- Added `forked_from` feature [#73](https://github.com/pepkit/pepdbagent/issues/73)
- Switched to pydantic2 [#105](https://github.com/pepkit/pepdbagent/issues/105)
Expand Down
2 changes: 1 addition & 1 deletion pepdbagent/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.3"
__version__ = "0.8.0"
5 changes: 5 additions & 0 deletions pepdbagent/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,8 @@ class ViewAlreadyExistsError(PEPDatabaseAgentError):

def __init__(self, msg=""):
super().__init__(f"""View already in the project. {msg}""")


class NamespaceNotFoundError(PEPDatabaseAgentError):
def __init__(self, msg=""):
super().__init__(f"""Project does not exist. {msg}""")
13 changes: 13 additions & 0 deletions pepdbagent/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
# file with pydantic models
from typing import List, Optional, Union, Dict
from pydantic import BaseModel, Field, ConfigDict, field_validator
from peppy.const import CONFIG_KEY, SUBSAMPLE_RAW_LIST_KEY, SAMPLE_RAW_DICT_KEY

from pepdbagent.const import DEFAULT_TAG


class ProjectDict(BaseModel):
"""
Project dict (raw) model
"""

config: dict = Field(alias=CONFIG_KEY)
subsample_list: Optional[Union[list, None]] = Field(alias=SUBSAMPLE_RAW_LIST_KEY)
sample_dict: list = Field(alias=SAMPLE_RAW_DICT_KEY)

model_config = ConfigDict(populate_by_name=True, extra="forbid")


class AnnotationModel(BaseModel):
"""
Project Annotation model. All meta metadata
Expand Down
8 changes: 6 additions & 2 deletions pepdbagent/modules/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sqlalchemy.orm import Session

from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, PKG_NAME, DEFAULT_LIMIT_INFO
from pepdbagent.exceptions import NamespaceNotFoundError
from pepdbagent.db_utils import Projects, BaseEngine
from pepdbagent.models import (
Namespace,
Expand Down Expand Up @@ -221,9 +222,9 @@ def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats:
:param monthly: if True, get statistics for the last 3 years monthly, else for the last 3 months daily.
"""
if monthly:
number_of_month = 3
else:
number_of_month = 12 * 3
else:
number_of_month = 3
today_date = datetime.today().date() + timedelta(days=1)
three_month_ago = today_date - timedelta(days=number_of_month * 30 + 1)
statement_last_update = select(Projects.last_update_date).filter(
Expand All @@ -240,6 +241,9 @@ def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats:
update_results = session.execute(statement_last_update).all()
create_results = session.execute(statement_create_date).all()

if not update_results:
raise NamespaceNotFoundError(f"Namespace {namespace} not found in the database")

if monthly:
year_month_str_submission = [
dt.submission_date.strftime("%Y-%m") for dt in create_results
Expand Down
120 changes: 113 additions & 7 deletions pepdbagent/modules/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
from sqlalchemy.exc import IntegrityError, NoResultFound
from sqlalchemy.orm import Session
from sqlalchemy import Select
import numpy as np

from peppy.const import (
SAMPLE_RAW_DICT_KEY,
SUBSAMPLE_RAW_LIST_KEY,
CONFIG_KEY,
SAMPLE_TABLE_INDEX_KEY,
SAMPLE_NAME_ATTR,
)

from pepdbagent.const import (
Expand All @@ -24,8 +26,12 @@
)

from pepdbagent.db_utils import Projects, Samples, Subsamples, BaseEngine
from pepdbagent.exceptions import ProjectNotFoundError, ProjectUniqueNameError
from pepdbagent.models import UpdateItems, UpdateModel
from pepdbagent.exceptions import (
ProjectNotFoundError,
ProjectUniqueNameError,
PEPDatabaseAgentError,
)
from pepdbagent.models import UpdateItems, UpdateModel, ProjectDict
from pepdbagent.utils import create_digest, registry_path_converter


Expand Down Expand Up @@ -210,7 +216,7 @@ def delete_by_rp(

def create(
self,
project: peppy.Project,
project: Union[peppy.Project, dict],
namespace: str,
name: str = None,
tag: str = DEFAULT_TAG,
Expand All @@ -227,6 +233,13 @@ def create(
update is set True)
:param peppy.Project project: Project object that has to be uploaded to the DB
danger zone:
optionally, project can be a dictionary with PEP elements
({
_config: dict,
_sample_dict: Union[list, dict],
_subsample_list: list
})
:param namespace: namespace of the project (Default: 'other')
:param name: name of the project (Default: name is taken from the project object)
:param tag: tag (or version) of the project.
Expand All @@ -239,9 +252,22 @@ def create(
:param description: description of the project
:return: None
"""
proj_dict = project.to_dict(extended=True, orient="records")
if isinstance(project, peppy.Project):
proj_dict = project.to_dict(extended=True, orient="records")
elif isinstance(project, dict):
# verify if the dictionary has all necessary elements.
# samples should be always presented as list of dicts (orient="records"))
_LOGGER.warning(
f"Project f{namespace}/{name}:{tag} is provided as dictionary. Project won't be validated."
)
proj_dict = ProjectDict(**project).model_dump(by_alias=True)
else:
raise PEPDatabaseAgentError(
"Project has to be peppy.Project object or dictionary with PEP elements"
)

if not description:
description = project.description
description = project.get(description, "")
proj_dict[CONFIG_KEY][DESCRIPTION_KEY] = description

namespace = namespace.lower()
Expand All @@ -255,7 +281,10 @@ def create(
proj_dict[CONFIG_KEY][NAME_KEY] = proj_name

proj_digest = create_digest(proj_dict)
number_of_samples = len(project.samples)
try:
number_of_samples = len(project.samples)
except AttributeError:
number_of_samples = len(proj_dict[SAMPLE_RAW_DICT_KEY])

if update_only:
_LOGGER.info(f"Update_only argument is set True. Updating project {proj_name} ...")
Expand Down Expand Up @@ -293,7 +322,9 @@ def create(
self._add_samples_to_project(
new_prj,
proj_dict[SAMPLE_RAW_DICT_KEY],
sample_table_index=project.sample_table_index,
sample_table_index=proj_dict[CONFIG_KEY].get(
SAMPLE_TABLE_INDEX_KEY, SAMPLE_NAME_ATTR
),
)

if proj_dict[SUBSAMPLE_RAW_LIST_KEY]:
Expand Down Expand Up @@ -833,6 +864,7 @@ def fork(
namespace=original_namespace,
name=original_name,
tag=original_tag,
raw=True,
),
namespace=fork_namespace,
name=fork_name,
Expand All @@ -857,6 +889,80 @@ def fork(
fork_prj.forked_from_id = original_prj.id
fork_prj.pop = original_prj.pop
fork_prj.submission_date = original_prj.submission_date
fork_prj.pep_schema = original_prj.pep_schema
fork_prj.description = description or original_prj.description

session.commit()
return None

def get_config(self, namespace: str, name: str, tag: str) -> Union[dict, None]:
"""
Get project configuration by providing namespace, name, and tag
:param namespace: project namespace
:param name: project name
:param tag: project tag
:return: project configuration
"""
statement = select(Projects.config).where(
and_(Projects.namespace == namespace, Projects.name == name, Projects.tag == tag)
)
with Session(self._sa_engine) as session:
result = session.execute(statement).one_or_none()

if result:
return result[0]
return None

def get_subsamples(self, namespace: str, name: str, tag: str) -> Union[list, None]:
"""
Get project subsamples by providing namespace, name, and tag
:param namespace: project namespace
:param name: project name
:param tag: project tag
:return: list with project subsamples
"""
statement = self._create_select_statement(name, namespace, tag)

with Session(self._sa_engine) as session:

found_prj = session.scalar(statement)

if found_prj:
_LOGGER.info(f"Project has been found: {found_prj.namespace}, {found_prj.name}")
subsample_dict = {}
if found_prj.subsamples_mapping:
for subsample in found_prj.subsamples_mapping:
if subsample.subsample_number not in subsample_dict.keys():
subsample_dict[subsample.subsample_number] = []
subsample_dict[subsample.subsample_number].append(subsample.subsample)
return list(subsample_dict.values())
else:
return []
else:
raise ProjectNotFoundError(
f"No project found for supplied input: '{namespace}/{name}:{tag}'. "
f"Did you supply a valid namespace and project?"
)

def get_samples(self, namespace: str, name: str, tag: str, raw: bool = True) -> list:
"""
Get project samples by providing namespace, name, and tag
:param namespace: project namespace
:param name: project name
:param tag: project tag
:param raw: if True, retrieve unprocessed (raw) PEP dict. [Default: True]
:return: list with project samples
"""
if raw:
return self.get(namespace=namespace, name=name, tag=tag, raw=True).get(
SAMPLE_RAW_DICT_KEY
)
return (
self.get(namespace=namespace, name=name, tag=tag, raw=False)
.sample_table.replace({np.nan: None})
.to_dict(orient="records")
)
10 changes: 8 additions & 2 deletions pepdbagent/modules/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def create(
view_name: str,
view_dict: Union[dict, CreateViewDictModel],
description: str = None,
no_fail: bool = False,
) -> None:
"""
Create a view of the project in the database.
Expand All @@ -151,6 +152,7 @@ def create(
sample_list: List[str] # list of sample names
}
:param description: description of the view
:param no_fail: if True, skip samples that doesn't exist in the project
retrun: None
"""
_LOGGER.debug(f"Creating view {view_name} with provided info: (view_dict: {view_dict})")
Expand Down Expand Up @@ -185,11 +187,15 @@ def create(
Samples.sample_name == sample_name,
)
)
sample_id = sa_session.execute(sample_statement).one()[0]
if not sample_id:
sample_id_tuple = sa_session.execute(sample_statement).one_or_none()
if sample_id_tuple:
sample_id = sample_id_tuple[0]
elif not sample_id_tuple and not no_fail:
raise SampleNotFoundError(
f"Sample {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag}:{sample_name} does not exist"
)
else:
continue

sa_session.add(ViewSampleAssociation(sample_id=sample_id, view=view))

Expand Down
1 change: 1 addition & 0 deletions requirements/requirements-all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ coloredlogs>=15.0.1
pytest-mock
pydantic>=2.0
psycopg>=3.1.15
numpy>=1.24.4
Loading

0 comments on commit 723e2d2

Please sign in to comment.