Skip to content

Commit

Permalink
Merge pull request #59 from databio/metadata
Browse files Browse the repository at this point in the history
Added metadata and gen valid tables
  • Loading branch information
khoroshevskyi authored Sep 20, 2024
2 parents cdb952a + 94e60d0 commit a4a3024
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 4 deletions.
41 changes: 40 additions & 1 deletion bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@
from typing import List, Optional

import pandas as pd
from sqlalchemy import TIMESTAMP, BigInteger, ForeignKey, Result, Select, event, select
from sqlalchemy import (
TIMESTAMP,
BigInteger,
ForeignKey,
Result,
Select,
event,
select,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import JSON
from sqlalchemy.engine import URL, Engine, create_engine
from sqlalchemy.event import listens_for
Expand Down Expand Up @@ -118,6 +127,10 @@ class Bed(Base):
)
license_mapping: Mapped["License"] = relationship("License", back_populates="bed")

ref_classifier: Mapped["GenomeRefStats"] = relationship(
"GenomeRefStats", back_populates="bed", cascade="all, delete-orphan"
)


class BedMetadata(Base):
__tablename__ = "bed_metadata"
Expand Down Expand Up @@ -351,6 +364,32 @@ class License(Base):
bed: Mapped[List["Bed"]] = relationship("Bed", back_populates="license_mapping")


class GenomeRefStats(Base):
__tablename__ = "genome_ref_stats"

id: Mapped[int] = mapped_column(primary_key=True, index=True)

bed_id: Mapped[str] = mapped_column(
ForeignKey("bed.id", ondelete="CASCADE"),
index=True,
nullable=False,
)
provided_genome: Mapped[str]
compared_genome: Mapped[str] = mapped_column(
nullable=False, comment="Compared Genome"
)

xs: Mapped[float] = mapped_column(nullable=True, default=None)
oobr: Mapped[float] = mapped_column(nullable=True, default=None)
sequence_fit: Mapped[float] = mapped_column(nullable=True, default=None)
assigned_points: Mapped[int] = mapped_column(nullable=False)
tier_ranking: Mapped[int] = mapped_column(nullable=False)

bed: Mapped["Bed"] = relationship("Bed", back_populates="ref_classifier")

__table_args__ = (UniqueConstraint("bed_id", "compared_genome"),)


@listens_for(Universes, "after_insert")
@listens_for(Universes, "after_update")
def add_bed_universe(mapper, connection, target):
Expand Down
12 changes: 12 additions & 0 deletions bbconf/models/bed_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,15 @@ class TokenizedPathResponse(BaseModel):
universe_id: str
file_path: str
endpoint_url: str


class RefGenValidModel(BaseModel):
provided_genome: str
compared_genome: str
xs: float = 0.0
oobr: Union[float, None] = None
sequence_fit: Union[float, None] = None
assigned_points: int
tier_ranking: int

model_config = ConfigDict(extra="forbid")
24 changes: 23 additions & 1 deletion bbconf/modules/bedfiles.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from logging import getLogger
from typing import Dict, Union
from pydantic import BaseModel

import numpy as np
from geniml.bbclient import BBClient
Expand All @@ -14,7 +15,15 @@

from bbconf.config_parser.bedbaseconfig import BedBaseConfig
from bbconf.const import DEFAULT_LICENSE, PKG_NAME, ZARR_TOKENIZED_FOLDER
from bbconf.db_utils import Bed, BedStats, Files, TokenizedBed, Universes, BedMetadata
from bbconf.db_utils import (
Bed,
BedStats,
Files,
TokenizedBed,
Universes,
BedMetadata,
GenomeRefStats,
)
from bbconf.exceptions import (
BedBaseConfError,
BedFIleExistsError,
Expand Down Expand Up @@ -43,6 +52,7 @@
TokenizedPathResponse,
UniverseMetadata,
StandardMeta,
RefGenValidModel,
)

_LOGGER = getLogger(PKG_NAME)
Expand Down Expand Up @@ -393,6 +403,7 @@ def add(
plots: dict = None,
files: dict = None,
classification: dict = None,
ref_validation: Dict[str, BaseModel] = None,
license_id: str = DEFAULT_LICENSE,
upload_qdrant: bool = False,
upload_pephub: bool = False,
Expand All @@ -410,6 +421,7 @@ def add(
:param plots: bed file plots
:param files: bed file files
:param classification: bed file classification
:param ref_validation: reference validation data. RefGenValidModel
:param license_id: bed file license id (default: 'DUO:0000042'). Full list of licenses:
https://raw.githubusercontent.com/EBISPOT/DUO/master/duo.csv
:param upload_qdrant: add bed file to qdrant indexs
Expand Down Expand Up @@ -532,6 +544,16 @@ def add(
session.add(new_bedstat)
session.add(new_metadata)

for ref_gen_check, data in ref_validation.items():
new_gen_ref = GenomeRefStats(
**RefGenValidModel(
**data.model_dump(),
provided_genome=classification.genome_alias,
compared_genome=ref_gen_check,
).model_dump(),
bed_id=identifier,
)
session.add(new_gen_ref)
session.commit()

return None
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

# [0.7.0] - 2024-09-20
## Added
- Table and methods for reference genome validator
- Table with standard metadata schema
- Bed file opening improvements

# [0.6.1] - 2024-08-21
## Added

Expand Down
4 changes: 2 additions & 2 deletions requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
yacman >= 0.9.1
sqlalchemy >= 2.0.0
geniml >= 0.4.0
geniml >= 0.4.1
psycopg >= 3.1.15
colorlogs
pydantic >= 2.6.4
botocore
boto3 >= 1.34.54
pephubclient >= 0.4.1
pephubclient >= 0.4.4
sqlalchemy_schemadisplay
zarr
pyyaml >= 6.0.1 # for s3fs because of the errors
Expand Down

0 comments on commit a4a3024

Please sign in to comment.