From e3e713997f7b4150534e7011936179629c3a5139 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Tue, 22 Oct 2024 17:53:28 +0200
Subject: [PATCH] Clean up (#195)

1. Remove old CLI "apps" that aren't used anymore
2. Remove pydantic 1 testing
3. Add contribution guide for sources
4. Don't output date if there's a data version available
5. Remove redundant testing from bioregistry
---
 .github/workflows/tests.yml                   |   9 +-
 benchmarking/resolver_benchmark.py            |  62 ------
 setup.cfg                                     |   3 +-
 src/pyobo/apps/__init__.py                    |   3 -
 src/pyobo/apps/cli.py                         |  24 ---
 src/pyobo/apps/gilda/__init__.py              |   3 -
 src/pyobo/apps/gilda/__main__.py              |   8 -
 src/pyobo/apps/gilda/app.py                   |  48 -----
 src/pyobo/apps/gilda/cli.py                   |  36 ----
 src/pyobo/apps/gilda/templates/base.html      |  33 ----
 src/pyobo/apps/gilda/templates/home.html      |  11 --
 src/pyobo/apps/gilda/templates/matches.html   |  32 ---
 src/pyobo/apps/mapper/__init__.py             |   3 -
 src/pyobo/apps/mapper/__main__.py             |  11 --
 src/pyobo/apps/mapper/cli.py                  |  37 ----
 src/pyobo/apps/mapper/mapper.py               | 187 ------------------
 src/pyobo/apps/mapper/templates/base.html     |  35 ----
 .../apps/mapper/templates/mapper_home.html    |  64 ------
 src/pyobo/cli/cli.py                          |   2 -
 src/pyobo/sources/README.md                   |  15 ++
 src/pyobo/sources/agrovoc.py                  |   4 +-
 src/pyobo/sources/uniprot/uniprot.py          |   1 +
 src/pyobo/struct/reference.py                 |   6 +-
 src/pyobo/struct/struct.py                    |   5 +-
 tests/test_get_miriam_url.py                  |  83 --------
 tests/test_sources/test_meta.py               |  38 ++++
 tox.ini                                       |   7 +-
 27 files changed, 71 insertions(+), 699 deletions(-)
 delete mode 100644 benchmarking/resolver_benchmark.py
 delete mode 100644 src/pyobo/apps/__init__.py
 delete mode 100644 src/pyobo/apps/cli.py
 delete mode 100644 src/pyobo/apps/gilda/__init__.py
 delete mode 100644 src/pyobo/apps/gilda/__main__.py
 delete mode 100644 src/pyobo/apps/gilda/app.py
 delete mode 100644 src/pyobo/apps/gilda/cli.py
 delete mode 100644 src/pyobo/apps/gilda/templates/base.html
 delete mode 100644 src/pyobo/apps/gilda/templates/home.html
 delete mode 100644 src/pyobo/apps/gilda/templates/matches.html
 delete mode 100644 src/pyobo/apps/mapper/__init__.py
 delete mode 100644 src/pyobo/apps/mapper/__main__.py
 delete mode 100644 src/pyobo/apps/mapper/cli.py
 delete mode 100644 src/pyobo/apps/mapper/mapper.py
 delete mode 100644 src/pyobo/apps/mapper/templates/base.html
 delete mode 100644 src/pyobo/apps/mapper/templates/mapper_home.html
 create mode 100644 src/pyobo/sources/README.md
 delete mode 100644 tests/test_get_miriam_url.py
 create mode 100644 tests/test_sources/test_meta.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 25a2fe38..1c1e63dc 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -36,7 +36,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ "3.9", "3.12" ]
+        python-version: [ "3.12" ]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
@@ -46,7 +46,7 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get install graphviz
-          pip install tox
+          pip install tox tox-uv
       - name: Check RST conformity with doc8
         run: tox -e doc8
       - name: Check README.rst
@@ -60,7 +60,6 @@ jobs:
       matrix:
         os: [ ubuntu-latest ]
         python-version: [ "3.9", "3.12" ]
-        pydantic: [ "pydantic1", "pydantic2" ]
         exclude:
           - os: windows-latest
             python-version: 3.9
@@ -73,7 +72,7 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get install graphviz
-          pip install tox
+          pip install tox tox-uv
       - name: Test with pytest
         run:
-          tox -e py-${{ matrix.pydantic }}
+          tox -e py
diff --git a/benchmarking/resolver_benchmark.py b/benchmarking/resolver_benchmark.py
deleted file mode 100644
index 11c8fef4..00000000
--- a/benchmarking/resolver_benchmark.py
+++ /dev/null
@@ -1,62 +0,0 @@
-"""
-The purpose of this python file is to benchmark how many queries per second the resolver
-can handle. Since one of our previous bottlenecks was 5 per second from PubChem, any
-improvement is nice!
-"""
-
-import time
-
-import click
-import requests
-from requests_toolbelt.threaded import pool
-from tqdm import tqdm
-
-import pyobo
-
-
-def _get_urls(prefix="doid", host="localhost", port=5000):
-    identifiers = pyobo.get_id_name_mapping(prefix)
-    return [f"http://{host}:{port}/resolve/{prefix}:{identifier}" for identifier in identifiers]
-
-
-def benchmark_sync(prefix="doid", host="localhost", port=5000):
-    urls = _get_urls(prefix=prefix, host=host, port=port)
-
-    start = time.time()
-    for url in tqdm(urls, desc=f"benchmarking {prefix}"):
-        res = requests.get(url)
-        res.raise_for_status()
-    elapsed = time.time() - start
-    avg_elapsed = len(urls) / elapsed
-    print(f"Made {len(urls)} requests in {elapsed:.2f} seconds. Avg = {avg_elapsed:.2f} requests/s")
-
-
-def benchmark_async(prefix="doid", host="localhost", port=5000):
-    urls = _get_urls(prefix=prefix, host=host, port=port)
-
-    p = pool.Pool.from_urls(urls)
-
-    start = time.time()
-
-    # The following code is a modified version of ``p.join_all()`` with tqdm
-    for session_thread in tqdm(p._pool, desc=f"benchmarking async {prefix}"):
-        session_thread.join()
-
-    elapsed = time.time() - start
-    avg_elapsed = len(urls) / elapsed
-    print(
-        f"Made {len(urls)} async requests in {elapsed:.2f} seconds. Avg = {avg_elapsed:.2f} requests/s"
-    )
-
-
-@click.command()
-@click.option("--prefix", default="doid")
-@click.option("--host", default="localhost")
-@click.option("--port", default="5000")
-def main(prefix, host, port):
-    benchmark_sync(prefix=prefix, host=host, port=port)
-    benchmark_async(prefix=prefix, host=host, port=port)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/setup.cfg b/setup.cfg
index 091edfcf..47731e40 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -59,12 +59,13 @@ install_requires =
     tabulate
     cachier
     pystow>=0.2.7
-    bioversions>=0.5.202
+    bioversions>=0.5.514
     bioregistry>=0.10.20
     bioontologies>=0.4.0
     zenodo-client>=0.0.5
     class_resolver
     psycopg2-binary
+    pydantic>=2.0
     # Resource Downloaders
     drugbank_downloader
     chembl_downloader
diff --git a/src/pyobo/apps/__init__.py b/src/pyobo/apps/__init__.py
deleted file mode 100644
index 7c3a496b..00000000
--- a/src/pyobo/apps/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""A collection of web apps built on PyOBO."""
diff --git a/src/pyobo/apps/cli.py b/src/pyobo/apps/cli.py
deleted file mode 100644
index f2c2edbd..00000000
--- a/src/pyobo/apps/cli.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""CLI for PyOBO apps."""
-
-import click
-
-from .gilda.cli import main as gilda_main
-from .mapper.cli import main as mapper_main
-
-__all__ = [
-    "main",
-]
-
-
-@click.group(name="apps")
-def main():
-    """Apps."""
-
-
-main.add_command(gilda_main)
-main.add_command(mapper_main)
-
-if __name__ == "__main__":
-    main()
diff --git a/src/pyobo/apps/gilda/__init__.py b/src/pyobo/apps/gilda/__init__.py
deleted file mode 100644
index 81a446be..00000000
--- a/src/pyobo/apps/gilda/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""PyOBO's Gilda Service."""
diff --git a/src/pyobo/apps/gilda/__main__.py b/src/pyobo/apps/gilda/__main__.py
deleted file mode 100644
index 60656c17..00000000
--- a/src/pyobo/apps/gilda/__main__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""CLI for PyOBO's Gilda Service."""
-
-from .cli import main
-
-if __name__ == "__main__":
-    main()
diff --git a/src/pyobo/apps/gilda/app.py b/src/pyobo/apps/gilda/app.py
deleted file mode 100644
index 7326af42..00000000
--- a/src/pyobo/apps/gilda/app.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""PyOBO's Gilda Service."""
-
-from typing import Iterable, Union
-
-import flask
-from flask_bootstrap import Bootstrap
-from flask_wtf import FlaskForm
-from wtforms.fields import StringField, SubmitField
-from wtforms.validators import DataRequired
-
-from pyobo.gilda_utils import get_grounder
-
-
-class Form(FlaskForm):
-    """Form for submitting a query."""
-
-    text = StringField("Text", validators=[DataRequired()])
-    submit = SubmitField()
-
-    def make_response(self):
-        """Make a response with the text."""
-        return flask.redirect(flask.url_for("ground", text=self.text.data))
-
-
-def get_app(prefix: Union[str, Iterable[str]]):
-    """Make an app for grounding the text."""
-    grounder = get_grounder(prefix)
-
-    app = flask.Flask(__name__)
-    app.config["WTF_CSRF_ENABLED"] = False
-    Bootstrap(app)
-
-    @app.route("/", methods=["GET", "POST"])
-    def home():
-        """Ground the given text."""
-        form = Form()
-        if form.validate_on_submit():
-            return form.make_response()
-        return flask.render_template("home.html", form=form)
-
-    @app.route("/ground/<text>")
-    def ground(text: str):
-        """Ground the given text."""
-        return flask.jsonify([scored_match.to_json() for scored_match in grounder.ground(text)])
-
-    return app
diff --git a/src/pyobo/apps/gilda/cli.py b/src/pyobo/apps/gilda/cli.py
deleted file mode 100644
index 4130395d..00000000
--- a/src/pyobo/apps/gilda/cli.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""CLI for PyOBO's Gilda Service."""
-
-import click
-from more_click import (
-    host_option,
-    port_option,
-    run_app,
-    verbose_option,
-    with_gunicorn_option,
-    workers_option,
-)
-
-__all__ = [
-    "main",
-]
-
-
-@click.command(name="gilda")
-@click.argument("prefix", nargs=-1)
-@verbose_option
-@host_option
-@workers_option
-@port_option
-@with_gunicorn_option
-def main(prefix: str, host: str, port: str, with_gunicorn: bool, workers: int):
-    """Run the Gilda service for this database."""
-    from .app import get_app
-
-    app = get_app(prefix)
-    run_app(app, host=host, port=port, with_gunicorn=with_gunicorn, workers=workers)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/pyobo/apps/gilda/templates/base.html b/src/pyobo/apps/gilda/templates/base.html
deleted file mode 100644
index 52a01f0c..00000000
--- a/src/pyobo/apps/gilda/templates/base.html
+++ /dev/null
@@ -1,33 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-    {% block head %}
-        <!-- Required meta tags -->
-        <meta charset="utf-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
-
-        {% block styles %}
-            <!-- Bootstrap CSS -->
-            {{ bootstrap.load_css() }}
-        {% endblock %}
-
-        <script src="https://kit.fontawesome.com/4c86883252.js" crossorigin="anonymous"></script>
-
-        <title>{% block title %}{% endblock %}</title>
-    {% endblock %}
-</head>
-<body>
-<div class="container" style="margin-top: 50px; margin-bottom: 50px">
-    <div class="row">
-        <div class="card">
-            <!-- Your page content -->
-            {% block content %}{% endblock %}
-        </div>
-    </div>
-</div>
-{% block scripts %}
-    <!-- Optional JavaScript -->
-    {{ bootstrap.load_js() }}
-{% endblock %}
-</body>
-</html>
diff --git a/src/pyobo/apps/gilda/templates/home.html b/src/pyobo/apps/gilda/templates/home.html
deleted file mode 100644
index b9dafbc2..00000000
--- a/src/pyobo/apps/gilda/templates/home.html
+++ /dev/null
@@ -1,11 +0,0 @@
-{% extends "base.html" %}
-
-{% block content %}
-    <h5 class="card-header">Gilda</h5>
-    <div class="card-body">
-        <p>
-            This is the Gilda grounding service.
-        </p>
-        {{ wtf.quick_form(form, form_type='horizontal') }}
-    </div>
-{% endblock %}
diff --git a/src/pyobo/apps/gilda/templates/matches.html b/src/pyobo/apps/gilda/templates/matches.html
deleted file mode 100644
index cd5ecb5c..00000000
--- a/src/pyobo/apps/gilda/templates/matches.html
+++ /dev/null
@@ -1,32 +0,0 @@
-{% extends "base.html" %}
-
-{% block content %}
-    <h5 class="card-header">Gilda</h5>
-    <div class="card-body">
-        <p>
-            Results for <code>{{ form.text.data }}</code>
-            {% if form.context.data %} with context
-                <i>{{ form.context.data }}</i>{% endif %}
-        </p>
-    </div>
-    <table class="table table-striped table-hover">
-        <thead>
-        <tr>
-            <th>Database</th>
-            <th>Identifier</th>
-            <th>Name</th>
-            <th>Score</th>
-        </tr>
-        </thead>
-        <tbody>
-        {% for match in matches %}
-            <tr>
-                <td>{{ match['term']['db'] }}</td>
-                <td>{{ match['term']['id'] }}</td>
-                <td>{{ match['term']['entry_name'] }}</td>
-                <td class="text-right">{{ match['score'] | round(4) }}</td>
-            </tr>
-        {% endfor %}
-        </tbody>
-    </table>
-{% endblock %}
diff --git a/src/pyobo/apps/mapper/__init__.py b/src/pyobo/apps/mapper/__init__.py
deleted file mode 100644
index 89fb7885..00000000
--- a/src/pyobo/apps/mapper/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""PyOBO's Mapping Service."""
diff --git a/src/pyobo/apps/mapper/__main__.py b/src/pyobo/apps/mapper/__main__.py
deleted file mode 100644
index c0f5adca..00000000
--- a/src/pyobo/apps/mapper/__main__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""CLI for PyOBO's Mapping Service.
-
-Run with ``python -m pyobo.apps.mapper``
-"""
-
-from .cli import main
-
-if __name__ == "__main__":
-    main()
diff --git a/src/pyobo/apps/mapper/cli.py b/src/pyobo/apps/mapper/cli.py
deleted file mode 100644
index 5a0b70f4..00000000
--- a/src/pyobo/apps/mapper/cli.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""CLI for PyOBO's Mapping Service.
-
-Run with ``python -m pyobo.apps.mapper``.
-"""
-
-import click
-from more_click import (
-    host_option,
-    port_option,
-    run_app,
-    verbose_option,
-    with_gunicorn_option,
-)
-
-__all__ = [
-    "main",
-]
-
-
-@click.command(name="mapper")
-@click.option("-x", "--mappings-file")
-@port_option
-@host_option
-@with_gunicorn_option
-@verbose_option
-def main(mappings_file, host: str, port: str, with_gunicorn: bool):
-    """Run the mappings app."""
-    from .mapper import get_app
-
-    app = get_app(mappings_file)
-    run_app(app=app, host=host, port=port, with_gunicorn=with_gunicorn)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/pyobo/apps/mapper/mapper.py b/src/pyobo/apps/mapper/mapper.py
deleted file mode 100644
index 17c5bb57..00000000
--- a/src/pyobo/apps/mapper/mapper.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""PyOBO's Mapping Service.
-
-Run with ``python -m pyobo.apps.mapper``.
-"""
-
-import logging
-from functools import lru_cache
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
-
-import bioregistry
-import pandas as pd
-from flasgger import Swagger
-from flask import (
-    Blueprint,
-    Flask,
-    abort,
-    current_app,
-    jsonify,
-    render_template,
-    url_for,
-)
-from flask_bootstrap import VERSION_BOOTSTRAP, Bootstrap
-from werkzeug.local import LocalProxy
-
-from pyobo import Canonicalizer
-from pyobo.constants import PROVENANCE, SOURCE_PREFIX, TARGET_PREFIX
-from pyobo.identifier_utils import normalize_curie
-from pyobo.resource_utils import ensure_inspector_javert_df
-
-__all__ = [
-    "get_app",
-]
-
-logger = logging.getLogger(__name__)
-
-summary_df = LocalProxy(lambda: current_app.config["summary"])
-canonicalizer: Canonicalizer = LocalProxy(lambda: current_app.config["canonicalizer"])
-
-
-@lru_cache()
-def _single_source_shortest_path(curie: str) -> Optional[Mapping[str, List[Mapping[str, str]]]]:
-    return canonicalizer.single_source_shortest_path(curie=curie)
-
-
-@lru_cache()
-def _all_shortest_paths(source_curie: str, target_curie: str) -> List[List[Mapping[str, str]]]:
-    return canonicalizer.all_shortest_paths(source_curie=source_curie, target_curie=target_curie)
-
-
-#: The blueprint that gets added to the app
-search_blueprint = Blueprint("search", __name__)
-
-
-@search_blueprint.route("/")
-def home():
-    """Show the home page."""
-    return render_template("mapper_home.html")
-
-
-@search_blueprint.route("/mappings/<curie>")
-def single_source_mappings(curie: str):
-    """Return all length xrefs from the given identifier."""
-    if curie not in canonicalizer.graph:
-        return jsonify(
-            success=False,
-            query=dict(curie=curie),
-            message="could not find curie",
-        )
-    return jsonify(_single_source_shortest_path(curie))
-
-
-@search_blueprint.route("/mappings/<source_curie>/<target_curie>")
-def all_mappings(source_curie: str, target_curie: str):
-    """Return all shortest paths of xrefs between the two identifiers."""
-    if source_curie not in canonicalizer.graph:
-        return jsonify(
-            success=False,
-            query=dict(source_curie=source_curie, target_curie=target_curie),
-            message="could not find source curie",
-        )
-    if target_curie not in canonicalizer.graph:
-        return jsonify(
-            success=False,
-            query=dict(source_curie=source_curie, target_curie=target_curie),
-            message="could not find target curie",
-        )
-
-    return jsonify(_all_shortest_paths(source_curie, target_curie))
-
-
-@search_blueprint.route("/mappings/summarize")
-def summarize():
-    """Summarize the mappings."""
-    return summary_df.to_html(index=False)
-
-
-@search_blueprint.route("/mappings/summarize_by/<prefix>")
-def summarize_one(prefix: str):
-    """Summarize the mappings."""
-    norm_prefix = bioregistry.normalize_prefix(prefix)
-    if norm_prefix is None:
-        return abort(500, f"invalid prefix: {prefix}")
-    in_df = summary_df.loc[summary_df[TARGET_PREFIX] == norm_prefix, [SOURCE_PREFIX, "count"]]
-    out_df = summary_df.loc[summary_df[SOURCE_PREFIX] == norm_prefix, [TARGET_PREFIX, "count"]]
-    return f"""
-    <h1>Incoming Mappings to {norm_prefix}</h1>
-    {in_df.to_html(index=False)}
-    <h1>Outgoing Mappings from {norm_prefix}</h1>
-    {out_df.to_html(index=False)}
-    """
-
-
-@search_blueprint.route("/canonicalize/<curie>")
-def canonicalize(curie: str):
-    """Return the best CURIE."""
-    # TODO maybe normalize the curie first?
-    norm_prefix, norm_identifier = normalize_curie(curie)
-    if norm_prefix is None or norm_identifier is None:
-        return jsonify(
-            query=curie,
-            normalizable=False,
-        )
-
-    norm_curie = f"{norm_prefix}:{norm_identifier}"
-
-    rv: Dict[str, Any] = dict(query=curie)
-    if norm_curie != curie:
-        rv["norm_curie"] = norm_curie
-
-    if norm_curie not in canonicalizer.graph:
-        rv["found"] = False
-    else:
-        result_curie = canonicalizer.canonicalize(norm_curie)
-        rv.update(
-            found=True,
-            result=result_curie,
-            mappings=url_for(
-                f".{all_mappings.__name__}",
-                source_curie=norm_curie,
-                target_curie=result_curie,
-            ),
-        )
-
-    return jsonify(rv)
-
-
-def get_app(paths: Union[None, str, Iterable[str]] = None) -> Flask:
-    """Build the Flask app."""
-    app = Flask(__name__)
-    Swagger(app)
-
-    logger.info("using bootstrap_flask %s", VERSION_BOOTSTRAP)
-    Bootstrap(app)
-
-    if paths is None:
-        df = ensure_inspector_javert_df()
-    elif isinstance(paths, str):
-        df = pd.read_csv(paths, sep="\t", dtype=str)
-    else:
-        df = pd.concat(pd.read_csv(path, sep="\t", dtype=str) for path in paths)
-
-    app.config["summary"] = summarize_xref_df(df)
-    app.config["summary_provenances"] = summarize_xref_provenances_df(df)
-    # TODO allow for specification of priorities in the canonicalizer
-    app.config["canonicalizer"] = Canonicalizer.from_df(df)
-    app.register_blueprint(search_blueprint)
-    return app
-
-
-def summarize_xref_df(df: pd.DataFrame) -> pd.DataFrame:
-    """Get all meta-mappings."""
-    return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX])
-
-
-def summarize_xref_provenances_df(df: pd.DataFrame) -> pd.DataFrame:
-    """Get all meta-mappings."""
-    return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX, PROVENANCE])
-
-
-def _summarize(df: pd.DataFrame, columns) -> pd.DataFrame:
-    """Get all meta-mappings."""
-    rv = df[columns].groupby(columns).size().reset_index()
-    rv.columns = [*columns, "count"]
-    rv.sort_values("count", inplace=True, ascending=False)
-    return rv
diff --git a/src/pyobo/apps/mapper/templates/base.html b/src/pyobo/apps/mapper/templates/base.html
deleted file mode 100644
index 72273005..00000000
--- a/src/pyobo/apps/mapper/templates/base.html
+++ /dev/null
@@ -1,35 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-    {% block head %}
-        <!-- Required meta tags -->
-        <meta charset="utf-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
-
-        {% block styles %}
-            <!-- Bootstrap CSS -->
-            {{ bootstrap.load_css() }}
-        {% endblock %}
-
-        <script src="https://kit.fontawesome.com/4c86883252.js" crossorigin="anonymous"></script>
-
-        <title>{% block title %}{% endblock %}</title>
-    {% endblock %}
-</head>
-<body>
-<div class="container" style="margin-top: 25px; margin-bottom: 25px">
-    <div class="row justify-content-center">
-        <div class="col-md-8 col-lg-8">
-            <div class="card">
-                <!-- Your page content -->
-                {% block content %}{% endblock %}
-            </div>
-        </div>
-    </div>
-</div>
-{% block scripts %}
-    <!-- Optional JavaScript -->
-    {{ bootstrap.load_js() }}
-{% endblock %}
-</body>
-</html>
diff --git a/src/pyobo/apps/mapper/templates/mapper_home.html b/src/pyobo/apps/mapper/templates/mapper_home.html
deleted file mode 100644
index e5c931f7..00000000
--- a/src/pyobo/apps/mapper/templates/mapper_home.html
+++ /dev/null
@@ -1,64 +0,0 @@
-{% extends "base.html" %}
-
-{% block content %}
-    <h5 class="card-header">Inspector Javert's Mapper</h5>
-    <div class="card-body">
-        <p class="card-text">
-            This service resolves <a href="https://en.wikipedia.org/wiki/CURIE">CURIEs</a>
-            to the best CURIE that's mapped to it.
-        </p>
-        <p>
-            A summary of all of the xrefs can be found <a href="{{ url_for('.summarize') }}">here</a>.
-            You can also look at a summary for a specific prefix like <a
-                href="{{ url_for('.summarize_one', prefix='umls') }}"><code>UMLS</code></a>.
-        </p>
-        <ul>
-            <li>
-                Use the <code>/mappings</code> endpoint to look up equivalent entities,
-                for example, <a
-                    href="{{ url_for('.single_source_mappings', curie='hgnc:6893') }}"><code>hgnc:6893</code></a>.
-            </li>
-            <li>
-                Use the <code>/mappings</code> endpoint to look up all mappings between two entities,
-                for example, <a
-                    href="{{ url_for('.all_mappings', source_curie='hgnc:6893', target_curie='ensembl:ENSG00000186868') }}"><code>hgnc:6893</code>
-                and <code>ensembl:ENSG00000186868</code></a>.
-            </li>
-        </ul>
-    </div>
-    <table class="table">
-        <thead>
-        <tr>
-            <th scope="col">CURIE</th>
-            <th scope="col">Description</th>
-        </tr>
-        </thead>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='hgnc:6893') }}">hgnc:6893</a></td>
-            <td>✅ maps correct identifier to higher priority namespace (ncbigene)</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='ncbigene:4137') }}">ncbigene:4137</a></td>
-            <td>✅ already priority namespace</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='DOID:00000') }}">DOID:00000</a></td>
-            <td>❌ invalid identifier</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='NNN:00000') }}">NNN:00000</a></td>
-            <td>❌ invalid prefix</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='wikidata:Q42') }}">wikidata:Q42</a></td>
-            <td>❌ unmapped prefix</td>
-        </tr>
-    </table>
-    <div class="card-footer text-center">
-        Developed with ❤️ in 🇩🇪 by <a href="https://github.com/cthoyt">@cthoyt</a>.
-
-        (<span class="text-muted"><a href="https://github.com/pyobo/pyobo">source code</a></span> |
-        <span class="text-muted"><a
-                href="https://cthoyt.com/2020/04/19/inspector-javerts-xref-database.html">blog post</a></span>)
-    </div>
-{% endblock %}
diff --git a/src/pyobo/cli/cli.py b/src/pyobo/cli/cli.py
index 6b518367..0f9b4455 100644
--- a/src/pyobo/cli/cli.py
+++ b/src/pyobo/cli/cli.py
@@ -15,7 +15,6 @@
 from .aws import main as aws_main
 from .database import main as database_main
 from .lookup import lookup
-from ..apps.cli import main as apps_main
 from ..constants import RAW_DIRECTORY
 from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins
 from ..registries import iter_cached_obo
@@ -116,7 +115,6 @@ def remapping(file):
 
 
 main.add_command(lookup)
-main.add_command(apps_main)
 main.add_command(aws_main)
 main.add_command(database_main)
 
diff --git a/src/pyobo/sources/README.md b/src/pyobo/sources/README.md
new file mode 100644
index 00000000..d01c5544
--- /dev/null
+++ b/src/pyobo/sources/README.md
@@ -0,0 +1,15 @@
+# Sources
+
+1. Create a new module in `pyobo.sources` named with the prefix for the resource you're ontologizing
+2. Make sure your resource has a corresponding prefix in [the Bioregistry](https://github.com/biopragmatics/bioregistry)
+3. Subclass the `pyobo.Obo` class to represent your resource
+4. Add your resource to the list in `pyobo.sources.__init__`
+
+## What is in scope?
+
+1. Biomedical, semantic web, bibliographic, life sciences, and related natural sciences resources are welcome
+2. The source you want to ontologize should be an identifier resource, i.e., it mints its own identifiers. If you want
+   to ontologize some database that reuses some other identifier resource's identifiers, then this isn't the right
+   place.
+3. Resources that are not possible to download automatically are not in scope for PyOBO. Reproducibility and reusability
+   are core values of this software
diff --git a/src/pyobo/sources/agrovoc.py b/src/pyobo/sources/agrovoc.py
index 49d72ba0..b5d93f7f 100644
--- a/src/pyobo/sources/agrovoc.py
+++ b/src/pyobo/sources/agrovoc.py
@@ -11,6 +11,8 @@
     "ensure_agrovoc_graph",
 ]
 
+PREFIX = "agrovoc"
+
 
 def ensure_agrovoc_graph(version: str) -> Graph:
     """Download and parse the given version of AGROVOC."""
@@ -20,5 +22,5 @@ def ensure_agrovoc_graph(version: str) -> Graph:
     graph.bind("skosxl", "http://www.w3.org/2008/05/skos-xl#")
     graph.bind("skos", SKOS)
     graph.bind("dcterms", DCTERMS)
-    graph.bind("agrovoc", "http://aims.fao.org/aos/agrontology#")
+    graph.bind(PREFIX, "http://aims.fao.org/aos/agrontology#")
     return graph
diff --git a/src/pyobo/sources/uniprot/uniprot.py b/src/pyobo/sources/uniprot/uniprot.py
index 6b1a639d..e1a5a808 100644
--- a/src/pyobo/sources/uniprot/uniprot.py
+++ b/src/pyobo/sources/uniprot/uniprot.py
@@ -57,6 +57,7 @@ class UniProtGetter(Obo):
         gene_product_of,
         molecularly_interacts_with,
         derives_from,
+        located_in,
     ]
 
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py
index a357f85a..ab1b3cbc 100644
--- a/src/pyobo/struct/reference.py
+++ b/src/pyobo/struct/reference.py
@@ -7,7 +7,7 @@
 import bioregistry
 import curies
 from curies.api import ExpansionError
-from pydantic import Field, root_validator, validator
+from pydantic import Field, field_validator, model_validator
 
 from .utils import obo_escape
 from ..identifier_utils import normalize_curie
@@ -23,7 +23,7 @@ class Reference(curies.Reference):
 
     name: Optional[str] = Field(default=None, description="the name of the reference")
 
-    @validator("prefix")
+    @field_validator("prefix")
     def validate_prefix(cls, v):  # noqa
         """Validate the prefix for this reference."""
         norm_prefix = bioregistry.normalize_prefix(v)
@@ -41,7 +41,7 @@ def preferred_curie(self) -> str:
         """Get the preferred curie for this reference."""
         return f"{self.preferred_prefix}:{self.identifier}"
 
-    @root_validator(pre=True)
+    @model_validator(mode="before")
     def validate_identifier(cls, values):  # noqa
         """Validate the identifier."""
         prefix, identifier = values.get("prefix"), values.get("identifier")
diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py
index 9d17d205..72b401e1 100644
--- a/src/pyobo/struct/struct.py
+++ b/src/pyobo/struct/struct.py
@@ -661,13 +661,14 @@ def _iter_terms(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[T
     def iterate_obo_lines(self) -> Iterable[str]:
         """Iterate over the lines to write in an OBO file."""
         yield f"format-version: {self.format_version}"
-        yield f"date: {self.date_formatted}"
 
         if self.auto_generated_by is not None:
             yield f"auto-generated-by: {self.auto_generated_by}"
 
         if self.data_version is not None:
             yield f"data-version: {self.data_version}"
+        else:
+            yield f"date: {self.date_formatted}"
 
         for prefix, url in sorted((self.idspaces or {}).items()):
             yield f"idspace: {prefix} {url}"
@@ -1466,7 +1467,7 @@ def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> List[Mapping[str
 def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]:
     """Convert a type def."""
     # TODO add more later
-    return typedef.reference.dict()
+    return typedef.reference.model_dump()
 
 
 def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> List[str]:
diff --git a/tests/test_get_miriam_url.py b/tests/test_get_miriam_url.py
deleted file mode 100644
index 57665d7f..00000000
--- a/tests/test_get_miriam_url.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""Tests for identifiers.org URL generation."""
-
-import logging
-import unittest
-
-import requests
-from bioregistry import get_identifiers_org_iri
-
-logger = logging.getLogger(__name__)
-
-#: These resources don't seem to exist anymore
-BLACKLIST = {
-    "abs",
-    "aftol.taxonomy",
-    "agricola",
-    "ecogene",
-    "euclinicaltrials",
-    "fsnp",
-    "gold",
-    "gold.genome",
-    "gold.meta",
-}
-
-#: These resources will need special rules for resolving
-UNSOLVED = {
-    "ark",
-    "did",
-    "gramene.growthstage",
-    "gwascentral.phenotype",
-    # TODO
-}
-
-
-class TestMiriam(unittest.TestCase):
-    """Test generating identifiers.org links."""
-
-    def test_successful(self):
-        """Test CURIEs that should work."""
-        curies = [
-            ("go", "0006915"),  # name in LUI
-            ("doid", "11337"),  # name in LUI
-            ("mesh", "C000100"),  # namespace not in LUI
-        ]
-
-        # curies = []
-        # for entry in get_miriam():
-        #     prefix = entry['prefix']
-        #     if prefix <= 'gramene.growthstage':  # TODO REMOVE THIS LINE
-        #         continue  # TODO REMOVE THIS LINE
-        #     norm_prefix = normalize_prefix(prefix)
-        #     self.assertIsNotNone(norm_prefix, msg=f'could not normalize MIRIAM prefix: {norm_prefix}')
-        #     curies.append((prefix, norm_prefix, entry['sampleId']))
-
-        for prefix, identifier in curies:
-            if prefix in BLACKLIST or prefix in UNSOLVED:
-                continue
-            with self.subTest(prefix=prefix, msg=f"failed for MIRIAM prefix: {prefix}"):
-                url = get_identifiers_org_iri(prefix, identifier)
-                self.assertIsNotNone(url, msg=f"metaregistry does not contain prefix {prefix}")
-                try:
-                    res = requests.get(url)
-                except (
-                    requests.exceptions.SSLError,
-                    requests.exceptions.ConnectionError,
-                ):
-                    logger.warning(f"identifiers.org has a problem resolving prefix {prefix}")
-                    continue
-                self.assertFalse(
-                    res.text.startswith("INVALID"),
-                    msg=f"invalid url for {prefix}: {url}\n\n{res.text}",
-                )
-
-    def test_unsuccessful(self):
-        """Test links that should fail."""
-        curies = [
-            ("nope_nope_nope", "0006915"),
-        ]
-        for prefix, identifier in curies:
-            with self.subTest(prefix=prefix):
-                url = get_identifiers_org_iri(prefix, identifier)
-                self.assertIsNone(url)
diff --git a/tests/test_sources/test_meta.py b/tests/test_sources/test_meta.py
new file mode 100644
index 00000000..23625954
--- /dev/null
+++ b/tests/test_sources/test_meta.py
@@ -0,0 +1,38 @@
+"""Test sources."""
+
+import importlib
+import unittest
+from pathlib import Path
+
+import pyobo.sources
+from pyobo import Obo
+
+EXCEPTIONS = {"biogrid", "agrovoc", "go", "chebi"}
+
+
+class TestSources(unittest.TestCase):
+    """Test sources."""
+
+    def test_complete(self):
+        """Test all files are imported in `__init__.py`."""
+        directory = Path(pyobo.sources.__file__).parent.resolve()
+        for path in directory.iterdir():
+            if (
+                path.stem in {"utils", "__init__", "__pycache__", "README"}
+                or path.stem.endswith("_utils")
+                or path.stem.endswith("_constants")
+                or path.stem in EXCEPTIONS
+            ):
+                continue
+            with self.subTest(module=path.stem):
+                module = importlib.import_module(f"pyobo.sources.{path.stem}")
+                getters = [
+                    y
+                    for k in module.__dir__()
+                    if isinstance(y := getattr(module, k), type)
+                    and issubclass(y, Obo)
+                    and y is not Obo
+                ]
+                self.assertNotEqual(
+                    0, len(getters), msg=f"forgot to create Obo subclass in {module.__name__}"
+                )
diff --git a/tox.ini b/tox.ini
index 82dbda3b..2b21a3d7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -19,16 +19,13 @@ envlist =
     readme
     docs
     # the actual tests
-    py-pydantic1
-    py-pydantic2
+    py
+    py
     # always keep coverage-report last
     # coverage-report
 
 [testenv]
 commands = coverage run -p -m pytest --durations=20 {posargs:tests}
-deps =
-    pydantic1: pydantic<2.0
-    pydantic2: pydantic>=2.0
 extras =
     tests