Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more Article columns #19

Merged
merged 7 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""add more article fields

Revision ID: 1feba14fd658
Revises: 70e5e82e94c0 # noqa: W291
Create Date: 2023-12-04 20:10:13.657413

"""
import sqlalchemy as sa
import sqlmodel.sql.sqltypes
from sqlalchemy.dialects import postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision = "1feba14fd658"
down_revision = "70e5e82e94c0"
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("article", sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=True))
op.add_column("article", sa.Column("content", sa.Text(), nullable=False))
op.add_column("article", sa.Column("site_published_at", sa.DateTime(), nullable=False))
op.add_column("article", sa.Column("site_updated_at", sa.DateTime(), nullable=True))
op.drop_column("article", "published_at")
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("article", sa.Column("published_at", postgresql.TIMESTAMP(), autoincrement=False, nullable=False))
op.drop_column("article", "site_updated_at")
op.drop_column("article", "site_published_at")
op.drop_column("article", "content")
op.drop_column("article", "description")
# ### end Alembic commands ###
25 changes: 21 additions & 4 deletions article_rec_db/models/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Annotated
from uuid import UUID

from sqlmodel import Field, Relationship, String, UniqueConstraint
from sqlmodel import Field, Relationship, String, Text, UniqueConstraint

from article_rec_db.sites import SiteName

Expand All @@ -23,14 +23,31 @@ class Language(StrEnum):

class Article(UpdateTracked, table=True):
__table_args__ = (UniqueConstraint("site", "id_in_site", name="article_site_idinsite_unique"),)
__mapper_args__ = {"polymorphic_identity": "article"}

# Page ID refers to the page table
page_id: Annotated[UUID, Field(primary_key=True, foreign_key="page.id")]

# Site name
site: Annotated[SiteName, Field(sa_type=String)]
id_in_site: str # ID of article in the partner site's internal system
# ID of article in the partner site's internal system
id_in_site: str

# Title/headline
title: str
published_at: datetime
# Description/summary
description: str | None
# Full text of article; might also include (sanitized) HTML tags
content: Annotated[str, Field(sa_type=Text)]

# When the article was published on the partner site
site_published_at: datetime
# When the article was last updated on the partner site
site_updated_at: datetime | None

# Language of the article
language: Language = Language.ENGLISH

# Whether the article is in-house content or not (e.g., republished from another source)
is_in_house_content: bool = True

# An article is always a page, but a page is not always an article
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
description = "Database models and migrations for the Local News Lab's article recommendation system"
name = "article-rec-db"
version = "0.0.7"
version = "0.0.8"
authors = ["Duy Nguyen <hello.duyknguyen@gmail.com>"]
license = "MIT"
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/models/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from enum import StrEnum

import pytest
from sqlalchemy.future.engine import Engine, create_engine
from sqlalchemy.engine import Engine, create_engine
from sqlmodel import text

import alembic.command
Expand Down
31 changes: 23 additions & 8 deletions tests/integration/models/test_article.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def test_add_article_with_page(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article",
published_at=article_published_at,
description="Description",
content="<p>Content</p>",
site_published_at=article_published_at,
language=Language.SPANISH,
page=page,
)
Expand All @@ -44,7 +46,10 @@ def test_add_article_with_page(refresh_tables, engine):
assert article.site == DALLAS_FREE_PRESS.name
assert article.id_in_site == "1234"
assert article.title == "Example Article"
assert article.published_at == article_published_at
assert article.description == "Description"
assert article.content == "<p>Content</p>"
assert article.site_published_at == article_published_at
assert article.site_updated_at is None
assert article.language == Language.SPANISH
assert article.is_in_house_content is True
assert article.page is page
Expand All @@ -59,7 +64,8 @@ def test_add_article_without_page(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site="2345",
title="Example Article",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
)

with Session(engine) as session:
Expand Down Expand Up @@ -90,14 +96,16 @@ def test_add_articles_duplicate_site_and_id_in_site(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site=id_in_site,
title="Example Article",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page1,
)
article2 = Article(
site=DALLAS_FREE_PRESS.name,
id_in_site=id_in_site,
title="Example Article 2",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page2,
)

Expand Down Expand Up @@ -127,7 +135,8 @@ def test_update_article(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page,
)

Expand All @@ -139,10 +148,14 @@ def test_update_article(refresh_tables, engine):
assert article.db_updated_at is None

article.title = "Example Article with Title Updated"
site_updated_at = datetime.utcnow()
article.site_updated_at = site_updated_at
session.commit()

# Upon update, db_updated_at should be set
assert isinstance(article.db_updated_at, datetime)
assert article.title == "Example Article with Title Updated"
assert article.site_updated_at == site_updated_at


def test_delete_article(refresh_tables, engine):
Expand All @@ -160,14 +173,16 @@ def test_delete_article(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article 1",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page1,
)
article2 = Article(
site=DALLAS_FREE_PRESS.name,
id_in_site="2345",
title="Example Article 2",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page2,
)

Expand Down
18 changes: 12 additions & 6 deletions tests/integration/models/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def test_add_embedding(refresh_tables, engine, rng):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page,
)
execution = Execution(
Expand Down Expand Up @@ -70,21 +71,24 @@ def test_select_embeddings_knn(refresh_tables, engine, rng):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page1,
)
article2 = Article(
site=DALLAS_FREE_PRESS.name,
id_in_site="2345",
title="Example Article 2",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page2,
)
article3 = Article(
site=DALLAS_FREE_PRESS.name,
id_in_site="3456",
title="Example Article 3",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page3,
)
execution = Execution(
Expand Down Expand Up @@ -152,14 +156,16 @@ def test_delete_embedding(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article 1",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page1,
)
article2 = Article(
site=DALLAS_FREE_PRESS.name,
id_in_site="2345",
title="Example Article 2",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page2,
)

Expand Down
6 changes: 4 additions & 2 deletions tests/integration/models/test_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,16 @@ def test_delete_execution(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article 1",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page1,
)
article2 = Article(
site=DALLAS_FREE_PRESS.name,
id_in_site="2345",
title="Example Article 2",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page2,
)

Expand Down
6 changes: 4 additions & 2 deletions tests/integration/models/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,16 @@ def test_delete_page(refresh_tables, engine):
site=DALLAS_FREE_PRESS.name,
id_in_site="1234",
title="Example Article 1",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page1,
)
article2 = Article(
site=DALLAS_FREE_PRESS.name,
id_in_site="2345",
title="Example Article 2",
published_at=datetime.utcnow(),
content="<p>Content</p>",
site_published_at=datetime.utcnow(),
page=page2,
)

Expand Down
Loading