diff --git a/docs/changelog.md b/docs/changelog.md index c65ccb3..0645aae 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,9 +2,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.7.2] -- 2023-02-XX +## [0.7.2] -- 2023-02-02 - Fixed Unique Constraint in the Views - Fixed update project pop method +- Fixed bug in duplicating samples ## [0.7.1] -- 2023-01-22 diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index 462301e..9f81d65 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -61,6 +61,11 @@ def __init__(self, msg=""): super().__init__(f"""Sample does not exist. {msg}""") +class SampleAlreadyExistsError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Sample already exists. {msg}""") + + class ViewNotFoundError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""View does not exist. {msg}""") @@ -73,3 +78,12 @@ class SampleAlreadyInView(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""Sample is already in the view. {msg}""") + + +class ViewAlreadyExistsError(PEPDatabaseAgentError): + """ + View is already in the project exception + """ + + def __init__(self, msg=""): + super().__init__(f"""View already in the project. {msg}""") diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index d02f30b..1319b5a 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -510,6 +510,17 @@ def update( else: raise ProjectNotFoundError("No items will be updated!") + @staticmethod + def _find_duplicates(sample_name_list: List[str]) -> List[str]: + seen = set() + duplicates = set() + for name in sample_name_list: + if name in seen: + duplicates.add(name) + else: + seen.add(name) + return list(duplicates) + def _update_samples( self, namespace: str, @@ -527,6 +538,8 @@ def _update_samples( :param sample_name_key: key of the sample name :return: None """ + # TODO: This function is not ideal and is really slow. We should brainstorm this implementation + new_sample_names = [sample[sample_name_key] for sample in samples_list] with Session(self._sa_engine) as session: project = session.scalar( @@ -537,38 +550,98 @@ def _update_samples( ) ) old_sample_names = [sample.sample_name for sample in project.samples_mapping] + + # delete samples that are not in the new list + sample_names_copy = new_sample_names.copy() for old_sample in old_sample_names: - if old_sample not in new_sample_names: - session.execute( - delete(Samples).where( + if old_sample not in sample_names_copy: + this_sample = session.scalars( + select(Samples).where( and_( Samples.sample_name == old_sample, Samples.project_id == project.id ) ) ) + delete_samples_list = [k for k in this_sample] + session.delete(delete_samples_list[-1]) + else: + sample_names_copy.remove(old_sample) + # update or add samples order_number = 0 + added_sample_list = [] for new_sample in samples_list: order_number += 1 - if new_sample[sample_name_key] not in old_sample_names: - project.samples_mapping.append( - Samples( - sample=new_sample, - sample_name=new_sample[sample_name_key], - row_number=order_number, + + if new_sample[sample_name_key] not in added_sample_list: + added_sample_list.append(new_sample[sample_name_key]) + + if new_sample[sample_name_key] not in old_sample_names: + project.samples_mapping.append( + Samples( + sample=new_sample, + sample_name=new_sample[sample_name_key], + row_number=order_number, + ) ) - ) + else: + sample_mapping = session.scalar( + select(Samples).where( + and_( + Samples.sample_name == new_sample[sample_name_key], + Samples.project_id == project.id, + ) + ) + ) + sample_mapping.sample = new_sample + sample_mapping.row_number = order_number else: - sample_mapping = session.scalar( - select(Samples).where( - and_( - Samples.sample_name == new_sample[sample_name_key], - Samples.project_id == project.id, + # if sample_name is duplicated is sample table, find second sample and update or add it. + if new_sample[sample_name_key] in old_sample_names: + sample_mappings = session.scalars( + select(Samples).where( + and_( + Samples.sample_name == new_sample[sample_name_key], + Samples.project_id == project.id, + ) ) ) - ) - sample_mapping.sample = new_sample - sample_mapping.row_number = order_number + sample_mappings = [sample_mapping for sample_mapping in sample_mappings] + if len(sample_mappings) <= 1: + project.samples_mapping.append( + Samples( + sample=new_sample, + sample_name=new_sample[sample_name_key], + row_number=order_number, + ) + ) + else: + try: + sample_mapping = sample_mappings[ + added_sample_list.count(new_sample[sample_name_key]) + ] + sample_mapping.sample = new_sample + sample_mapping.row_number = order_number + + except Exception: + project.samples_mapping.append( + Samples( + sample=new_sample, + sample_name=new_sample[sample_name_key], + row_number=order_number, + ) + ) + added_sample_list.append(new_sample[sample_name_key]) + else: + project.samples_mapping.append( + Samples( + sample=new_sample, + sample_name=new_sample[sample_name_key], + row_number=order_number, + ) + ) + added_sample_list.append(new_sample[sample_name_key]) + session.commit() @staticmethod diff --git a/pepdbagent/modules/sample.py b/pepdbagent/modules/sample.py index cd54c5c..7154ebe 100644 --- a/pepdbagent/modules/sample.py +++ b/pepdbagent/modules/sample.py @@ -13,7 +13,7 @@ DEFAULT_TAG, PKG_NAME, ) -from pepdbagent.exceptions import SampleNotFoundError +from pepdbagent.exceptions import SampleNotFoundError, SampleAlreadyExistsError from pepdbagent.db_utils import BaseEngine, Samples, Projects @@ -241,7 +241,7 @@ def add( ) if sample_mapping and not overwrite: - raise ValueError( + raise SampleAlreadyExistsError( f"Sample {namespace}/{name}:{tag}?{sample_name} already exists in the database" ) elif sample_mapping and overwrite: diff --git a/pepdbagent/modules/view.py b/pepdbagent/modules/view.py index aa43f86..a43bddd 100644 --- a/pepdbagent/modules/view.py +++ b/pepdbagent/modules/view.py @@ -18,6 +18,7 @@ SampleAlreadyInView, ProjectNotFoundError, SampleNotFoundError, + ViewAlreadyExistsError, ) from pepdbagent.db_utils import BaseEngine, Samples, Projects, Views, ViewSampleAssociation @@ -188,7 +189,13 @@ def create( raise SampleNotFoundError( f"Sample {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag}:{sample_name} does not exist" ) - sa_session.add(ViewSampleAssociation(sample_id=sample_id, view=view)) + try: + sa_session.add(ViewSampleAssociation(sample_id=sample_id, view=view)) + + except IntegrityError: + raise ViewAlreadyExistsError( + f"View {view_name} of the project {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag} already exists" + ) sa_session.commit() @@ -334,10 +341,15 @@ def remove_sample( ViewSampleAssociation.view_id == view.id, ) ) - sa_session.execute(delete_statement) - sa_session.commit() + try: + sa_session.execute(delete_statement) + sa_session.commit() + except IntegrityError: + raise SampleNotFoundError( + f"Sample {namespace}/{name}:{tag}:{sample_name} does not exist in view {view_name}" + ) - return None + return None def get_snap_view( self, namespace: str, name: str, tag: str, sample_name_list: List[str], raw: bool = False diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 59a4a35..a7dedf6 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -324,7 +324,7 @@ def test_update_project_description( "namespace, name", [ ["namespace1", "amendments1"], - # ["namespace1", "amendments2"], + ["namespace3", "subtable1"], # ["namespace2", "derive"], # ["namespace2", "imply"], ], @@ -384,6 +384,43 @@ def test_update_project_private(self, initiate_pepdb_con, namespace, name): ) assert is_private is True + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "basic"], + ], + ) + def test_project_can_have_2_sample_names(self, initiate_pepdb_con, namespace, name): + """ + In PEP 2.1.0 project can have 2 rows with same sample name, + ensure that update works correctly + """ + new_prj = initiate_pepdb_con.project.get(namespace=namespace, name=name) + prj_dict = new_prj.to_dict(extended=True, orient="records") + + prj_dict["_sample_dict"].append( + {"file": "data/frog23_data.txt", "protocol": "anySample3Type", "sample_name": "frog_2"} + ) + prj_dict["_sample_dict"].append( + { + "file": "data/frog23_data.txt4", + "protocol": "anySample3Type4", + "sample_name": "frog_2", + } + ) + + new_prj.description = "new_description" + initiate_pepdb_con.project.update( + namespace=namespace, + name=name, + tag="default", + update_dict={"project": peppy.Project.from_dict(prj_dict)}, + ) + + prj = initiate_pepdb_con.project.get(namespace=namespace, name=name, raw=True) + + assert len(prj["_sample_dict"]) == 4 + @pytest.mark.skipif( not db_setup(),