diff --git a/README.md b/README.md index 84e8b632..d2bd4efe 100644 --- a/README.md +++ b/README.md @@ -291,4 +291,5 @@ To create a new release, - Check which services currently work (before the update). It's a sanity check for if a service _doesn't_ work later. - Update the code on the server by checking out the release - Merge configurations as necessary + - Make sure the latest database migrations are applied: see ["Schema Migrations"](alembic/readme.md#update-the-database) 9. Notify everyone (e.g., in the API channel in Slack). diff --git a/alembic/Dockerfile b/alembic/Dockerfile new file mode 100644 index 00000000..99a96388 --- /dev/null +++ b/alembic/Dockerfile @@ -0,0 +1,5 @@ +FROM aiod_metadata_catalogue +RUN python -m pip install alembic +ENV PYTHONPATH="$PYTHONPATH:/app" +WORKDIR /alembic +ENTRYPOINT ["alembic", "upgrade", "head"] diff --git a/alembic/README.md b/alembic/README.md new file mode 100644 index 00000000..c0960285 --- /dev/null +++ b/alembic/README.md @@ -0,0 +1,32 @@ +# Database Schema Migrations + +We use [Alembic](https://alembic.sqlalchemy.org/en/latest/tutorial.html#running-our-first-migration) to automate database schema migrations +(e.g., adding a table, altering a column, and so on). +Please refer to the Alembic documentation for more information. + +## Usage +Commands below assume that the root directory of the project is your current working directory. + +Build the image with: +```commandline +docker build -f alembic/Dockerfile . -t aiod-migration +``` + +With the sqlserver container running, you can migrate to the latest schema with: + +```commandline +docker run -v $(pwd)/alembic:/alembic:ro -v $(pwd)/src:/app -it --network aiod_default aiod-migration +``` +Make sure that the specifid `--network` is the docker network that has the `sqlserver` container. +The alembic directory is mounted to ensure the latest migrations are available, +the src directory is mounted so the migration scripts can use defined classes and variable from the project. + +## Update the Database +> [!Caution] +> Database migrations may be irreversible. Always make sure there is a backup of the old database. + +Following the usage commands above, on a new release we should run alembic to ensure the latest schema changes are applied. +The default entrypoint of the container specifies to upgrade the database to the latest schema. + +## TODO + - set up support for auto-generating migration scripts: https://alembic.sqlalchemy.org/en/latest/autogenerate.html diff --git a/alembic/alembic.ini b/alembic/alembic.ini new file mode 100644 index 00000000..f8e5a0c9 --- /dev/null +++ b/alembic/alembic.ini @@ -0,0 +1,116 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = '' + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/alembic/README b/alembic/alembic/README new file mode 100644 index 00000000..98e4f9c4 --- /dev/null +++ b/alembic/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/alembic/env.py b/alembic/alembic/env.py new file mode 100644 index 00000000..3320285a --- /dev/null +++ b/alembic/alembic/env.py @@ -0,0 +1,72 @@ +from logging.config import fileConfig + +from alembic import context + +# Assumes /src is in the Python path, so we can re-use logic for constructing db connections +from database.session import db_url +from database.session import EngineSingleton + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = db_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = EngineSingleton().engine + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/alembic/script.py.mako b/alembic/alembic/script.py.mako new file mode 100644 index 00000000..fbc4b07d --- /dev/null +++ b/alembic/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py b/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py new file mode 100644 index 00000000..a1e4ef16 --- /dev/null +++ b/alembic/alembic/versions/0a23b40cc09c_extend_max_length_of_text_in_note.py @@ -0,0 +1,48 @@ +"""Extend max length of text in note + +Revision ID: 0a23b40cc09c +Revises: +Create Date: 2024-08-29 11:37:20.827291 + +""" +from typing import Sequence, Union + +from alembic import op +from sqlalchemy import String + +from database.model.field_length import VERY_LONG + +# revision identifiers, used by Alembic. +revision: str = "0a23b40cc09c" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # All models that derive from AIResourceBase + for table in [ + "news", + "team", + "person", + "organisation", + "event", + "project", + "service", + "dataset", + "case_study", + "publication", + "computational_asset", + "ml_model", + "experiment", + "educational_resource", + ]: + op.alter_column( + f"note_{table}", + "value", + type_=String(VERY_LONG), + ) + + +def downgrade() -> None: + pass diff --git a/src/database/model/ai_resource/note.py b/src/database/model/ai_resource/note.py index 276428a2..6ed1f514 100644 --- a/src/database/model/ai_resource/note.py +++ b/src/database/model/ai_resource/note.py @@ -3,7 +3,7 @@ from sqlalchemy import Column, Integer, ForeignKey from sqlmodel import Field, SQLModel -from database.model.field_length import LONG +from database.model.field_length import VERY_LONG class NoteBase(SQLModel): @@ -11,7 +11,7 @@ class NoteBase(SQLModel): index=False, unique=False, description="The string value", - max_length=LONG, + max_length=VERY_LONG, schema_extra={"example": "A brief record of points or ideas about this AI resource."}, ) diff --git a/src/database/model/field_length.py b/src/database/model/field_length.py index 4bc0115e..47ab7736 100644 --- a/src/database/model/field_length.py +++ b/src/database/model/field_length.py @@ -7,4 +7,5 @@ SHORT = 64 NORMAL = 256 LONG = 1800 # an A4s full of text +VERY_LONG = 8000 # Cut off for out-of-row storage MAX_TEXT = 65535 # max length for Mysql text