From e3e713997f7b4150534e7011936179629c3a5139 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 22 Oct 2024 17:53:28 +0200 Subject: [PATCH] Clean up (#195) 1. Remove old CLI "apps" that aren't used anymore 2. Remove pydantic 1 testing 3. Add contribution guide for sources 4. Don't output date if there's a data version available 5. Remove redundant testing from bioregistry --- .github/workflows/tests.yml | 9 +- benchmarking/resolver_benchmark.py | 62 ------ setup.cfg | 3 +- src/pyobo/apps/__init__.py | 3 - src/pyobo/apps/cli.py | 24 --- src/pyobo/apps/gilda/__init__.py | 3 - src/pyobo/apps/gilda/__main__.py | 8 - src/pyobo/apps/gilda/app.py | 48 ----- src/pyobo/apps/gilda/cli.py | 36 ---- src/pyobo/apps/gilda/templates/base.html | 33 ---- src/pyobo/apps/gilda/templates/home.html | 11 -- src/pyobo/apps/gilda/templates/matches.html | 32 --- src/pyobo/apps/mapper/__init__.py | 3 - src/pyobo/apps/mapper/__main__.py | 11 -- src/pyobo/apps/mapper/cli.py | 37 ---- src/pyobo/apps/mapper/mapper.py | 187 ------------------ src/pyobo/apps/mapper/templates/base.html | 35 ---- .../apps/mapper/templates/mapper_home.html | 64 ------ src/pyobo/cli/cli.py | 2 - src/pyobo/sources/README.md | 15 ++ src/pyobo/sources/agrovoc.py | 4 +- src/pyobo/sources/uniprot/uniprot.py | 1 + src/pyobo/struct/reference.py | 6 +- src/pyobo/struct/struct.py | 5 +- tests/test_get_miriam_url.py | 83 -------- tests/test_sources/test_meta.py | 38 ++++ tox.ini | 7 +- 27 files changed, 71 insertions(+), 699 deletions(-) delete mode 100644 benchmarking/resolver_benchmark.py delete mode 100644 src/pyobo/apps/__init__.py delete mode 100644 src/pyobo/apps/cli.py delete mode 100644 src/pyobo/apps/gilda/__init__.py delete mode 100644 src/pyobo/apps/gilda/__main__.py delete mode 100644 src/pyobo/apps/gilda/app.py delete mode 100644 src/pyobo/apps/gilda/cli.py delete mode 100644 src/pyobo/apps/gilda/templates/base.html delete mode 100644 src/pyobo/apps/gilda/templates/home.html delete mode 100644 src/pyobo/apps/gilda/templates/matches.html delete mode 100644 src/pyobo/apps/mapper/__init__.py delete mode 100644 src/pyobo/apps/mapper/__main__.py delete mode 100644 src/pyobo/apps/mapper/cli.py delete mode 100644 src/pyobo/apps/mapper/mapper.py delete mode 100644 src/pyobo/apps/mapper/templates/base.html delete mode 100644 src/pyobo/apps/mapper/templates/mapper_home.html create mode 100644 src/pyobo/sources/README.md delete mode 100644 tests/test_get_miriam_url.py create mode 100644 tests/test_sources/test_meta.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 25a2fe38..1c1e63dc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ "3.9", "3.12" ] + python-version: [ "3.12" ] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -46,7 +46,7 @@ jobs: - name: Install dependencies run: | sudo apt-get install graphviz - pip install tox + pip install tox tox-uv - name: Check RST conformity with doc8 run: tox -e doc8 - name: Check README.rst @@ -60,7 +60,6 @@ jobs: matrix: os: [ ubuntu-latest ] python-version: [ "3.9", "3.12" ] - pydantic: [ "pydantic1", "pydantic2" ] exclude: - os: windows-latest python-version: 3.9 @@ -73,7 +72,7 @@ jobs: - name: Install dependencies run: | sudo apt-get install graphviz - pip install tox + pip install tox tox-uv - name: Test with pytest run: - tox -e py-${{ matrix.pydantic }} + tox -e py diff --git a/benchmarking/resolver_benchmark.py b/benchmarking/resolver_benchmark.py deleted file mode 100644 index 11c8fef4..00000000 --- a/benchmarking/resolver_benchmark.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -The purpose of this python file is to benchmark how many queries per second the resolver -can handle. Since one of our previous bottlenecks was 5 per second from PubChem, any -improvement is nice! -""" - -import time - -import click -import requests -from requests_toolbelt.threaded import pool -from tqdm import tqdm - -import pyobo - - -def _get_urls(prefix="doid", host="localhost", port=5000): - identifiers = pyobo.get_id_name_mapping(prefix) - return [f"http://{host}:{port}/resolve/{prefix}:{identifier}" for identifier in identifiers] - - -def benchmark_sync(prefix="doid", host="localhost", port=5000): - urls = _get_urls(prefix=prefix, host=host, port=port) - - start = time.time() - for url in tqdm(urls, desc=f"benchmarking {prefix}"): - res = requests.get(url) - res.raise_for_status() - elapsed = time.time() - start - avg_elapsed = len(urls) / elapsed - print(f"Made {len(urls)} requests in {elapsed:.2f} seconds. Avg = {avg_elapsed:.2f} requests/s") - - -def benchmark_async(prefix="doid", host="localhost", port=5000): - urls = _get_urls(prefix=prefix, host=host, port=port) - - p = pool.Pool.from_urls(urls) - - start = time.time() - - # The following code is a modified version of ``p.join_all()`` with tqdm - for session_thread in tqdm(p._pool, desc=f"benchmarking async {prefix}"): - session_thread.join() - - elapsed = time.time() - start - avg_elapsed = len(urls) / elapsed - print( - f"Made {len(urls)} async requests in {elapsed:.2f} seconds. Avg = {avg_elapsed:.2f} requests/s" - ) - - -@click.command() -@click.option("--prefix", default="doid") -@click.option("--host", default="localhost") -@click.option("--port", default="5000") -def main(prefix, host, port): - benchmark_sync(prefix=prefix, host=host, port=port) - benchmark_async(prefix=prefix, host=host, port=port) - - -if __name__ == "__main__": - main() diff --git a/setup.cfg b/setup.cfg index 091edfcf..47731e40 100644 --- a/setup.cfg +++ b/setup.cfg @@ -59,12 +59,13 @@ install_requires = tabulate cachier pystow>=0.2.7 - bioversions>=0.5.202 + bioversions>=0.5.514 bioregistry>=0.10.20 bioontologies>=0.4.0 zenodo-client>=0.0.5 class_resolver psycopg2-binary + pydantic>=2.0 # Resource Downloaders drugbank_downloader chembl_downloader diff --git a/src/pyobo/apps/__init__.py b/src/pyobo/apps/__init__.py deleted file mode 100644 index 7c3a496b..00000000 --- a/src/pyobo/apps/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# -*- coding: utf-8 -*- - -"""A collection of web apps built on PyOBO.""" diff --git a/src/pyobo/apps/cli.py b/src/pyobo/apps/cli.py deleted file mode 100644 index f2c2edbd..00000000 --- a/src/pyobo/apps/cli.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -"""CLI for PyOBO apps.""" - -import click - -from .gilda.cli import main as gilda_main -from .mapper.cli import main as mapper_main - -__all__ = [ - "main", -] - - -@click.group(name="apps") -def main(): - """Apps.""" - - -main.add_command(gilda_main) -main.add_command(mapper_main) - -if __name__ == "__main__": - main() diff --git a/src/pyobo/apps/gilda/__init__.py b/src/pyobo/apps/gilda/__init__.py deleted file mode 100644 index 81a446be..00000000 --- a/src/pyobo/apps/gilda/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PyOBO's Gilda Service.""" diff --git a/src/pyobo/apps/gilda/__main__.py b/src/pyobo/apps/gilda/__main__.py deleted file mode 100644 index 60656c17..00000000 --- a/src/pyobo/apps/gilda/__main__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- - -"""CLI for PyOBO's Gilda Service.""" - -from .cli import main - -if __name__ == "__main__": - main() diff --git a/src/pyobo/apps/gilda/app.py b/src/pyobo/apps/gilda/app.py deleted file mode 100644 index 7326af42..00000000 --- a/src/pyobo/apps/gilda/app.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PyOBO's Gilda Service.""" - -from typing import Iterable, Union - -import flask -from flask_bootstrap import Bootstrap -from flask_wtf import FlaskForm -from wtforms.fields import StringField, SubmitField -from wtforms.validators import DataRequired - -from pyobo.gilda_utils import get_grounder - - -class Form(FlaskForm): - """Form for submitting a query.""" - - text = StringField("Text", validators=[DataRequired()]) - submit = SubmitField() - - def make_response(self): - """Make a response with the text.""" - return flask.redirect(flask.url_for("ground", text=self.text.data)) - - -def get_app(prefix: Union[str, Iterable[str]]): - """Make an app for grounding the text.""" - grounder = get_grounder(prefix) - - app = flask.Flask(__name__) - app.config["WTF_CSRF_ENABLED"] = False - Bootstrap(app) - - @app.route("/", methods=["GET", "POST"]) - def home(): - """Ground the given text.""" - form = Form() - if form.validate_on_submit(): - return form.make_response() - return flask.render_template("home.html", form=form) - - @app.route("/ground/") - def ground(text: str): - """Ground the given text.""" - return flask.jsonify([scored_match.to_json() for scored_match in grounder.ground(text)]) - - return app diff --git a/src/pyobo/apps/gilda/cli.py b/src/pyobo/apps/gilda/cli.py deleted file mode 100644 index 4130395d..00000000 --- a/src/pyobo/apps/gilda/cli.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- - -"""CLI for PyOBO's Gilda Service.""" - -import click -from more_click import ( - host_option, - port_option, - run_app, - verbose_option, - with_gunicorn_option, - workers_option, -) - -__all__ = [ - "main", -] - - -@click.command(name="gilda") -@click.argument("prefix", nargs=-1) -@verbose_option -@host_option -@workers_option -@port_option -@with_gunicorn_option -def main(prefix: str, host: str, port: str, with_gunicorn: bool, workers: int): - """Run the Gilda service for this database.""" - from .app import get_app - - app = get_app(prefix) - run_app(app, host=host, port=port, with_gunicorn=with_gunicorn, workers=workers) - - -if __name__ == "__main__": - main() diff --git a/src/pyobo/apps/gilda/templates/base.html b/src/pyobo/apps/gilda/templates/base.html deleted file mode 100644 index 52a01f0c..00000000 --- a/src/pyobo/apps/gilda/templates/base.html +++ /dev/null @@ -1,33 +0,0 @@ - - - - {% block head %} - - - - - {% block styles %} - - {{ bootstrap.load_css() }} - {% endblock %} - - - - {% block title %}{% endblock %} - {% endblock %} - - -
-
-
- - {% block content %}{% endblock %} -
-
-
-{% block scripts %} - - {{ bootstrap.load_js() }} -{% endblock %} - - diff --git a/src/pyobo/apps/gilda/templates/home.html b/src/pyobo/apps/gilda/templates/home.html deleted file mode 100644 index b9dafbc2..00000000 --- a/src/pyobo/apps/gilda/templates/home.html +++ /dev/null @@ -1,11 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
Gilda
-
-

- This is the Gilda grounding service. -

- {{ wtf.quick_form(form, form_type='horizontal') }} -
-{% endblock %} diff --git a/src/pyobo/apps/gilda/templates/matches.html b/src/pyobo/apps/gilda/templates/matches.html deleted file mode 100644 index cd5ecb5c..00000000 --- a/src/pyobo/apps/gilda/templates/matches.html +++ /dev/null @@ -1,32 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
Gilda
-
-

- Results for {{ form.text.data }} - {% if form.context.data %} with context - {{ form.context.data }}{% endif %} -

-
- - - - - - - - - - - {% for match in matches %} - - - - - - - {% endfor %} - -
DatabaseIdentifierNameScore
{{ match['term']['db'] }}{{ match['term']['id'] }}{{ match['term']['entry_name'] }}{{ match['score'] | round(4) }}
-{% endblock %} diff --git a/src/pyobo/apps/mapper/__init__.py b/src/pyobo/apps/mapper/__init__.py deleted file mode 100644 index 89fb7885..00000000 --- a/src/pyobo/apps/mapper/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PyOBO's Mapping Service.""" diff --git a/src/pyobo/apps/mapper/__main__.py b/src/pyobo/apps/mapper/__main__.py deleted file mode 100644 index c0f5adca..00000000 --- a/src/pyobo/apps/mapper/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- - -"""CLI for PyOBO's Mapping Service. - -Run with ``python -m pyobo.apps.mapper`` -""" - -from .cli import main - -if __name__ == "__main__": - main() diff --git a/src/pyobo/apps/mapper/cli.py b/src/pyobo/apps/mapper/cli.py deleted file mode 100644 index 5a0b70f4..00000000 --- a/src/pyobo/apps/mapper/cli.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- - -"""CLI for PyOBO's Mapping Service. - -Run with ``python -m pyobo.apps.mapper``. -""" - -import click -from more_click import ( - host_option, - port_option, - run_app, - verbose_option, - with_gunicorn_option, -) - -__all__ = [ - "main", -] - - -@click.command(name="mapper") -@click.option("-x", "--mappings-file") -@port_option -@host_option -@with_gunicorn_option -@verbose_option -def main(mappings_file, host: str, port: str, with_gunicorn: bool): - """Run the mappings app.""" - from .mapper import get_app - - app = get_app(mappings_file) - run_app(app=app, host=host, port=port, with_gunicorn=with_gunicorn) - - -if __name__ == "__main__": - main() diff --git a/src/pyobo/apps/mapper/mapper.py b/src/pyobo/apps/mapper/mapper.py deleted file mode 100644 index 17c5bb57..00000000 --- a/src/pyobo/apps/mapper/mapper.py +++ /dev/null @@ -1,187 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PyOBO's Mapping Service. - -Run with ``python -m pyobo.apps.mapper``. -""" - -import logging -from functools import lru_cache -from typing import Any, Dict, Iterable, List, Mapping, Optional, Union - -import bioregistry -import pandas as pd -from flasgger import Swagger -from flask import ( - Blueprint, - Flask, - abort, - current_app, - jsonify, - render_template, - url_for, -) -from flask_bootstrap import VERSION_BOOTSTRAP, Bootstrap -from werkzeug.local import LocalProxy - -from pyobo import Canonicalizer -from pyobo.constants import PROVENANCE, SOURCE_PREFIX, TARGET_PREFIX -from pyobo.identifier_utils import normalize_curie -from pyobo.resource_utils import ensure_inspector_javert_df - -__all__ = [ - "get_app", -] - -logger = logging.getLogger(__name__) - -summary_df = LocalProxy(lambda: current_app.config["summary"]) -canonicalizer: Canonicalizer = LocalProxy(lambda: current_app.config["canonicalizer"]) - - -@lru_cache() -def _single_source_shortest_path(curie: str) -> Optional[Mapping[str, List[Mapping[str, str]]]]: - return canonicalizer.single_source_shortest_path(curie=curie) - - -@lru_cache() -def _all_shortest_paths(source_curie: str, target_curie: str) -> List[List[Mapping[str, str]]]: - return canonicalizer.all_shortest_paths(source_curie=source_curie, target_curie=target_curie) - - -#: The blueprint that gets added to the app -search_blueprint = Blueprint("search", __name__) - - -@search_blueprint.route("/") -def home(): - """Show the home page.""" - return render_template("mapper_home.html") - - -@search_blueprint.route("/mappings/") -def single_source_mappings(curie: str): - """Return all length xrefs from the given identifier.""" - if curie not in canonicalizer.graph: - return jsonify( - success=False, - query=dict(curie=curie), - message="could not find curie", - ) - return jsonify(_single_source_shortest_path(curie)) - - -@search_blueprint.route("/mappings//") -def all_mappings(source_curie: str, target_curie: str): - """Return all shortest paths of xrefs between the two identifiers.""" - if source_curie not in canonicalizer.graph: - return jsonify( - success=False, - query=dict(source_curie=source_curie, target_curie=target_curie), - message="could not find source curie", - ) - if target_curie not in canonicalizer.graph: - return jsonify( - success=False, - query=dict(source_curie=source_curie, target_curie=target_curie), - message="could not find target curie", - ) - - return jsonify(_all_shortest_paths(source_curie, target_curie)) - - -@search_blueprint.route("/mappings/summarize") -def summarize(): - """Summarize the mappings.""" - return summary_df.to_html(index=False) - - -@search_blueprint.route("/mappings/summarize_by/") -def summarize_one(prefix: str): - """Summarize the mappings.""" - norm_prefix = bioregistry.normalize_prefix(prefix) - if norm_prefix is None: - return abort(500, f"invalid prefix: {prefix}") - in_df = summary_df.loc[summary_df[TARGET_PREFIX] == norm_prefix, [SOURCE_PREFIX, "count"]] - out_df = summary_df.loc[summary_df[SOURCE_PREFIX] == norm_prefix, [TARGET_PREFIX, "count"]] - return f""" -

Incoming Mappings to {norm_prefix}

- {in_df.to_html(index=False)} -

Outgoing Mappings from {norm_prefix}

- {out_df.to_html(index=False)} - """ - - -@search_blueprint.route("/canonicalize/") -def canonicalize(curie: str): - """Return the best CURIE.""" - # TODO maybe normalize the curie first? - norm_prefix, norm_identifier = normalize_curie(curie) - if norm_prefix is None or norm_identifier is None: - return jsonify( - query=curie, - normalizable=False, - ) - - norm_curie = f"{norm_prefix}:{norm_identifier}" - - rv: Dict[str, Any] = dict(query=curie) - if norm_curie != curie: - rv["norm_curie"] = norm_curie - - if norm_curie not in canonicalizer.graph: - rv["found"] = False - else: - result_curie = canonicalizer.canonicalize(norm_curie) - rv.update( - found=True, - result=result_curie, - mappings=url_for( - f".{all_mappings.__name__}", - source_curie=norm_curie, - target_curie=result_curie, - ), - ) - - return jsonify(rv) - - -def get_app(paths: Union[None, str, Iterable[str]] = None) -> Flask: - """Build the Flask app.""" - app = Flask(__name__) - Swagger(app) - - logger.info("using bootstrap_flask %s", VERSION_BOOTSTRAP) - Bootstrap(app) - - if paths is None: - df = ensure_inspector_javert_df() - elif isinstance(paths, str): - df = pd.read_csv(paths, sep="\t", dtype=str) - else: - df = pd.concat(pd.read_csv(path, sep="\t", dtype=str) for path in paths) - - app.config["summary"] = summarize_xref_df(df) - app.config["summary_provenances"] = summarize_xref_provenances_df(df) - # TODO allow for specification of priorities in the canonicalizer - app.config["canonicalizer"] = Canonicalizer.from_df(df) - app.register_blueprint(search_blueprint) - return app - - -def summarize_xref_df(df: pd.DataFrame) -> pd.DataFrame: - """Get all meta-mappings.""" - return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX]) - - -def summarize_xref_provenances_df(df: pd.DataFrame) -> pd.DataFrame: - """Get all meta-mappings.""" - return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX, PROVENANCE]) - - -def _summarize(df: pd.DataFrame, columns) -> pd.DataFrame: - """Get all meta-mappings.""" - rv = df[columns].groupby(columns).size().reset_index() - rv.columns = [*columns, "count"] - rv.sort_values("count", inplace=True, ascending=False) - return rv diff --git a/src/pyobo/apps/mapper/templates/base.html b/src/pyobo/apps/mapper/templates/base.html deleted file mode 100644 index 72273005..00000000 --- a/src/pyobo/apps/mapper/templates/base.html +++ /dev/null @@ -1,35 +0,0 @@ - - - - {% block head %} - - - - - {% block styles %} - - {{ bootstrap.load_css() }} - {% endblock %} - - - - {% block title %}{% endblock %} - {% endblock %} - - -
-
-
-
- - {% block content %}{% endblock %} -
-
-
-
-{% block scripts %} - - {{ bootstrap.load_js() }} -{% endblock %} - - diff --git a/src/pyobo/apps/mapper/templates/mapper_home.html b/src/pyobo/apps/mapper/templates/mapper_home.html deleted file mode 100644 index e5c931f7..00000000 --- a/src/pyobo/apps/mapper/templates/mapper_home.html +++ /dev/null @@ -1,64 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
Inspector Javert's Mapper
-
-

- This service resolves CURIEs - to the best CURIE that's mapped to it. -

-

- A summary of all of the xrefs can be found here. - You can also look at a summary for a specific prefix like UMLS. -

- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
CURIEDescription
hgnc:6893✅ maps correct identifier to higher priority namespace (ncbigene)
ncbigene:4137✅ already priority namespace
DOID:00000❌ invalid identifier
NNN:00000❌ invalid prefix
wikidata:Q42❌ unmapped prefix
- -{% endblock %} diff --git a/src/pyobo/cli/cli.py b/src/pyobo/cli/cli.py index 6b518367..0f9b4455 100644 --- a/src/pyobo/cli/cli.py +++ b/src/pyobo/cli/cli.py @@ -15,7 +15,6 @@ from .aws import main as aws_main from .database import main as database_main from .lookup import lookup -from ..apps.cli import main as apps_main from ..constants import RAW_DIRECTORY from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins from ..registries import iter_cached_obo @@ -116,7 +115,6 @@ def remapping(file): main.add_command(lookup) -main.add_command(apps_main) main.add_command(aws_main) main.add_command(database_main) diff --git a/src/pyobo/sources/README.md b/src/pyobo/sources/README.md new file mode 100644 index 00000000..d01c5544 --- /dev/null +++ b/src/pyobo/sources/README.md @@ -0,0 +1,15 @@ +# Sources + +1. Create a new module in `pyobo.sources` named with the prefix for the resource you're ontologizing +2. Make sure your resource has a corresponding prefix in [the Bioregistry](https://github.com/biopragmatics/bioregistry) +3. Subclass the `pyobo.Obo` class to represent your resource +4. Add your resource to the list in `pyobo.sources.__init__` + +## What is in scope? + +1. Biomedical, semantic web, bibliographic, life sciences, and related natural sciences resources are welcome +2. The source you want to ontologize should be an identifier resource, i.e., it mints its own identifiers. If you want + to ontologize some database that reuses some other identifier resource's identifiers, then this isn't the right + place. +3. Resources that are not possible to download automatically are not in scope for PyOBO. Reproducibility and reusability + are core values of this software diff --git a/src/pyobo/sources/agrovoc.py b/src/pyobo/sources/agrovoc.py index 49d72ba0..b5d93f7f 100644 --- a/src/pyobo/sources/agrovoc.py +++ b/src/pyobo/sources/agrovoc.py @@ -11,6 +11,8 @@ "ensure_agrovoc_graph", ] +PREFIX = "agrovoc" + def ensure_agrovoc_graph(version: str) -> Graph: """Download and parse the given version of AGROVOC.""" @@ -20,5 +22,5 @@ def ensure_agrovoc_graph(version: str) -> Graph: graph.bind("skosxl", "http://www.w3.org/2008/05/skos-xl#") graph.bind("skos", SKOS) graph.bind("dcterms", DCTERMS) - graph.bind("agrovoc", "http://aims.fao.org/aos/agrontology#") + graph.bind(PREFIX, "http://aims.fao.org/aos/agrontology#") return graph diff --git a/src/pyobo/sources/uniprot/uniprot.py b/src/pyobo/sources/uniprot/uniprot.py index 6b1a639d..e1a5a808 100644 --- a/src/pyobo/sources/uniprot/uniprot.py +++ b/src/pyobo/sources/uniprot/uniprot.py @@ -57,6 +57,7 @@ class UniProtGetter(Obo): gene_product_of, molecularly_interacts_with, derives_from, + located_in, ] def iter_terms(self, force: bool = False) -> Iterable[Term]: diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index a357f85a..ab1b3cbc 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -7,7 +7,7 @@ import bioregistry import curies from curies.api import ExpansionError -from pydantic import Field, root_validator, validator +from pydantic import Field, field_validator, model_validator from .utils import obo_escape from ..identifier_utils import normalize_curie @@ -23,7 +23,7 @@ class Reference(curies.Reference): name: Optional[str] = Field(default=None, description="the name of the reference") - @validator("prefix") + @field_validator("prefix") def validate_prefix(cls, v): # noqa """Validate the prefix for this reference.""" norm_prefix = bioregistry.normalize_prefix(v) @@ -41,7 +41,7 @@ def preferred_curie(self) -> str: """Get the preferred curie for this reference.""" return f"{self.preferred_prefix}:{self.identifier}" - @root_validator(pre=True) + @model_validator(mode="before") def validate_identifier(cls, values): # noqa """Validate the identifier.""" prefix, identifier = values.get("prefix"), values.get("identifier") diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 9d17d205..72b401e1 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -661,13 +661,14 @@ def _iter_terms(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[T def iterate_obo_lines(self) -> Iterable[str]: """Iterate over the lines to write in an OBO file.""" yield f"format-version: {self.format_version}" - yield f"date: {self.date_formatted}" if self.auto_generated_by is not None: yield f"auto-generated-by: {self.auto_generated_by}" if self.data_version is not None: yield f"data-version: {self.data_version}" + else: + yield f"date: {self.date_formatted}" for prefix, url in sorted((self.idspaces or {}).items()): yield f"idspace: {prefix} {url}" @@ -1466,7 +1467,7 @@ def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> List[Mapping[str def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]: """Convert a type def.""" # TODO add more later - return typedef.reference.dict() + return typedef.reference.model_dump() def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> List[str]: diff --git a/tests/test_get_miriam_url.py b/tests/test_get_miriam_url.py deleted file mode 100644 index 57665d7f..00000000 --- a/tests/test_get_miriam_url.py +++ /dev/null @@ -1,83 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Tests for identifiers.org URL generation.""" - -import logging -import unittest - -import requests -from bioregistry import get_identifiers_org_iri - -logger = logging.getLogger(__name__) - -#: These resources don't seem to exist anymore -BLACKLIST = { - "abs", - "aftol.taxonomy", - "agricola", - "ecogene", - "euclinicaltrials", - "fsnp", - "gold", - "gold.genome", - "gold.meta", -} - -#: These resources will need special rules for resolving -UNSOLVED = { - "ark", - "did", - "gramene.growthstage", - "gwascentral.phenotype", - # TODO -} - - -class TestMiriam(unittest.TestCase): - """Test generating identifiers.org links.""" - - def test_successful(self): - """Test CURIEs that should work.""" - curies = [ - ("go", "0006915"), # name in LUI - ("doid", "11337"), # name in LUI - ("mesh", "C000100"), # namespace not in LUI - ] - - # curies = [] - # for entry in get_miriam(): - # prefix = entry['prefix'] - # if prefix <= 'gramene.growthstage': # TODO REMOVE THIS LINE - # continue # TODO REMOVE THIS LINE - # norm_prefix = normalize_prefix(prefix) - # self.assertIsNotNone(norm_prefix, msg=f'could not normalize MIRIAM prefix: {norm_prefix}') - # curies.append((prefix, norm_prefix, entry['sampleId'])) - - for prefix, identifier in curies: - if prefix in BLACKLIST or prefix in UNSOLVED: - continue - with self.subTest(prefix=prefix, msg=f"failed for MIRIAM prefix: {prefix}"): - url = get_identifiers_org_iri(prefix, identifier) - self.assertIsNotNone(url, msg=f"metaregistry does not contain prefix {prefix}") - try: - res = requests.get(url) - except ( - requests.exceptions.SSLError, - requests.exceptions.ConnectionError, - ): - logger.warning(f"identifiers.org has a problem resolving prefix {prefix}") - continue - self.assertFalse( - res.text.startswith("INVALID"), - msg=f"invalid url for {prefix}: {url}\n\n{res.text}", - ) - - def test_unsuccessful(self): - """Test links that should fail.""" - curies = [ - ("nope_nope_nope", "0006915"), - ] - for prefix, identifier in curies: - with self.subTest(prefix=prefix): - url = get_identifiers_org_iri(prefix, identifier) - self.assertIsNone(url) diff --git a/tests/test_sources/test_meta.py b/tests/test_sources/test_meta.py new file mode 100644 index 00000000..23625954 --- /dev/null +++ b/tests/test_sources/test_meta.py @@ -0,0 +1,38 @@ +"""Test sources.""" + +import importlib +import unittest +from pathlib import Path + +import pyobo.sources +from pyobo import Obo + +EXCEPTIONS = {"biogrid", "agrovoc", "go", "chebi"} + + +class TestSources(unittest.TestCase): + """Test sources.""" + + def test_complete(self): + """Test all files are imported in `__init__.py`.""" + directory = Path(pyobo.sources.__file__).parent.resolve() + for path in directory.iterdir(): + if ( + path.stem in {"utils", "__init__", "__pycache__", "README"} + or path.stem.endswith("_utils") + or path.stem.endswith("_constants") + or path.stem in EXCEPTIONS + ): + continue + with self.subTest(module=path.stem): + module = importlib.import_module(f"pyobo.sources.{path.stem}") + getters = [ + y + for k in module.__dir__() + if isinstance(y := getattr(module, k), type) + and issubclass(y, Obo) + and y is not Obo + ] + self.assertNotEqual( + 0, len(getters), msg=f"forgot to create Obo subclass in {module.__name__}" + ) diff --git a/tox.ini b/tox.ini index 82dbda3b..2b21a3d7 100644 --- a/tox.ini +++ b/tox.ini @@ -19,16 +19,13 @@ envlist = readme docs # the actual tests - py-pydantic1 - py-pydantic2 + py + py # always keep coverage-report last # coverage-report [testenv] commands = coverage run -p -m pytest --durations=20 {posargs:tests} -deps = - pydantic1: pydantic<2.0 - pydantic2: pydantic>=2.0 extras = tests