From 5812013686606df6c8d25bd074b2cd84f5c6db25 Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Fri, 6 Sep 2024 12:02:46 +0200 Subject: [PATCH] Add service for database schema migration. Extend length of note.value field (#353) the reason to use very_long is that it makes more sense than an arbitary long length that supports the current use case. the very long is based on when data can be stored in row, so has meaning to the database and will not change (unless db settings are changed). --------- Co-authored-by: taniya-das --- README.md | 1 + alembic/Dockerfile | 5 + alembic/README.md | 32 +++++ alembic/alembic.ini | 116 ++++++++++++++++++ alembic/alembic/README | 1 + alembic/alembic/env.py | 72 +++++++++++ alembic/alembic/script.py.mako | 26 ++++ ...cc09c_extend_max_length_of_text_in_note.py | 48 ++++++++ src/database/model/ai_resource/note.py | 4 +- src/database/model/field_length.py | 1 + 10 files changed, 304 insertions(+), 2 deletions(-) create mode 100644 alembic/Dockerfile create mode 100644 alembic/README.md create mode 100644 alembic/alembic.ini create mode 100644 alembic/alembic/README create mode 100644 alembic/alembic/env.py create mode 100644 alembic/alembic/script.py.mako create mode 100644 alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py diff --git a/README.md b/README.md index 84e8b632..d2bd4efe 100644 --- a/README.md +++ b/README.md @@ -291,4 +291,5 @@ To create a new release, - Check which services currently work (before the update). It's a sanity check for if a service _doesn't_ work later. - Update the code on the server by checking out the release - Merge configurations as necessary + - Make sure the latest database migrations are applied: see ["Schema Migrations"](alembic/readme.md#update-the-database) 9. Notify everyone (e.g., in the API channel in Slack). diff --git a/alembic/Dockerfile b/alembic/Dockerfile new file mode 100644 index 00000000..99a96388 --- /dev/null +++ b/alembic/Dockerfile @@ -0,0 +1,5 @@ +FROM aiod_metadata_catalogue +RUN python -m pip install alembic +ENV PYTHONPATH="$PYTHONPATH:/app" +WORKDIR /alembic +ENTRYPOINT ["alembic", "upgrade", "head"] diff --git a/alembic/README.md b/alembic/README.md new file mode 100644 index 00000000..c0960285 --- /dev/null +++ b/alembic/README.md @@ -0,0 +1,32 @@ +# Database Schema Migrations + +We use [Alembic](https://alembic.sqlalchemy.org/en/latest/tutorial.html#running-our-first-migration) to automate database schema migrations +(e.g., adding a table, altering a column, and so on). +Please refer to the Alembic documentation for more information. + +## Usage +Commands below assume that the root directory of the project is your current working directory. + +Build the image with: +```commandline +docker build -f alembic/Dockerfile . -t aiod-migration +``` + +With the sqlserver container running, you can migrate to the latest schema with: + +```commandline +docker run -v $(pwd)/alembic:/alembic:ro -v $(pwd)/src:/app -it --network aiod_default aiod-migration +``` +Make sure that the specifid `--network` is the docker network that has the `sqlserver` container. +The alembic directory is mounted to ensure the latest migrations are available, +the src directory is mounted so the migration scripts can use defined classes and variable from the project. + +## Update the Database +> [!Caution] +> Database migrations may be irreversible. Always make sure there is a backup of the old database. + +Following the usage commands above, on a new release we should run alembic to ensure the latest schema changes are applied. +The default entrypoint of the container specifies to upgrade the database to the latest schema. + +## TODO + - set up support for auto-generating migration scripts: https://alembic.sqlalchemy.org/en/latest/autogenerate.html diff --git a/alembic/alembic.ini b/alembic/alembic.ini new file mode 100644 index 00000000..f8e5a0c9 --- /dev/null +++ b/alembic/alembic.ini @@ -0,0 +1,116 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = '' + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/alembic/README b/alembic/alembic/README new file mode 100644 index 00000000..98e4f9c4 --- /dev/null +++ b/alembic/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/alembic/env.py b/alembic/alembic/env.py new file mode 100644 index 00000000..3320285a --- /dev/null +++ b/alembic/alembic/env.py @@ -0,0 +1,72 @@ +from logging.config import fileConfig + +from alembic import context + +# Assumes /src is in the Python path, so we can re-use logic for constructing db connections +from database.session import db_url +from database.session import EngineSingleton + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = db_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = EngineSingleton().engine + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/alembic/script.py.mako b/alembic/alembic/script.py.mako new file mode 100644 index 00000000..fbc4b07d --- /dev/null +++ b/alembic/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py b/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py new file mode 100644 index 00000000..a1e4ef16 --- /dev/null +++ b/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py @@ -0,0 +1,48 @@ +"""Extend max length of text in note + +Revision ID: 0a23b40cc09c +Revises: +Create Date: 2024-08-29 11:37:20.827291 + +""" +from typing import Sequence, Union + +from alembic import op +from sqlalchemy import String + +from database.model.field_length import VERY_LONG + +# revision identifiers, used by Alembic. +revision: str = "0a23b40cc09c" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # All models that derive from AIResourceBase + for table in [ + "news", + "team", + "person", + "organisation", + "event", + "project", + "service", + "dataset", + "case_study", + "publication", + "computational_asset", + "ml_model", + "experiment", + "educational_resource", + ]: + op.alter_column( + f"note_{table}", + "value", + type_=String(VERY_LONG), + ) + + +def downgrade() -> None: + pass diff --git a/src/database/model/ai_resource/note.py b/src/database/model/ai_resource/note.py index 276428a2..6ed1f514 100644 --- a/src/database/model/ai_resource/note.py +++ b/src/database/model/ai_resource/note.py @@ -3,7 +3,7 @@ from sqlalchemy import Column, Integer, ForeignKey from sqlmodel import Field, SQLModel -from database.model.field_length import LONG +from database.model.field_length import VERY_LONG class NoteBase(SQLModel): @@ -11,7 +11,7 @@ class NoteBase(SQLModel): index=False, unique=False, description="The string value", - max_length=LONG, + max_length=VERY_LONG, schema_extra={"example": "A brief record of points or ideas about this AI resource."}, ) diff --git a/src/database/model/field_length.py b/src/database/model/field_length.py index 4bc0115e..47ab7736 100644 --- a/src/database/model/field_length.py +++ b/src/database/model/field_length.py @@ -7,4 +7,5 @@ SHORT = 64 NORMAL = 256 LONG = 1800 # an A4s full of text +VERY_LONG = 8000 # Cut off for out-of-row storage MAX_TEXT = 65535 # max length for Mysql text