From 7e337d71d9dfc3358fb5cdb8df6ad82fab88d7e4 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sat, 1 Feb 2025 18:03:15 +0000 Subject: [PATCH 1/8] Created test to replicate issue --- back/tests/conftest.py | 10 +++++++++ back/tests/test_api/test_datasets.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/back/tests/conftest.py b/back/tests/conftest.py index 3e2a5174..556e9b44 100644 --- a/back/tests/conftest.py +++ b/back/tests/conftest.py @@ -26,6 +26,16 @@ logging.getLogger("passlib").setLevel(logging.WARNING) +@pytest.fixture +def repo_root() -> Path: + return Path(__file__).parent.parent.parent + + +@pytest.fixture +def example_data_dir(repo_root: Path) -> Path: + return repo_root / "example_data" + + @pytest.fixture def data_dir() -> Path: """Return the path to the data directory.""" diff --git a/back/tests/test_api/test_datasets.py b/back/tests/test_api/test_datasets.py index 1644bef8..b68238fe 100644 --- a/back/tests/test_api/test_datasets.py +++ b/back/tests/test_api/test_datasets.py @@ -5,10 +5,12 @@ from pathlib import Path import pytest +from soundevent.io.aoef import to_aeof from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from whombat import api, exceptions, models, schemas +from whombat.api.io import aoef async def test_created_dataset_is_stored_in_the_database( @@ -761,3 +763,33 @@ async def test_create_dataset_registers_all_recordings( all_recordings, _ = await api.recordings.get_many(session) assert len(all_recordings) == 2 + + +async def test_exported_datasets_paths_are_not_absolute( + session: AsyncSession, + example_data_dir: Path, +): + example_dataset = example_data_dir / "example_dataset.json" + assert example_dataset.is_file() + + audio_dir = example_data_dir / "audio" + assert audio_dir.is_dir() + + db_dataset = await aoef.import_dataset( + session, + example_dataset, + dataset_dir=audio_dir, + audio_dir=audio_dir, + ) + + await session.commit() + await session.refresh(db_dataset) + + whombat_dataset = await api.datasets.get(session, db_dataset.uuid) + dataset = await api.datasets.to_soundevent(session, whombat_dataset) + + exported = to_aeof(dataset) + + for recording in exported.data.recordings or []: + assert not recording.path.is_absolute() + assert recording.path.is_relative_to(audio_dir) From 5f45a24cdcb0c5818f4150025bd32e8555f3bdef Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sat, 1 Feb 2025 18:17:34 +0000 Subject: [PATCH 2/8] Created export/import methods for datasets Python API --- back/src/whombat/api/datasets.py | 29 +++++++++++++++++++++++++++- back/src/whombat/routes/datasets.py | 15 +++++--------- back/tests/test_api/test_datasets.py | 13 +++---------- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/back/src/whombat/api/datasets.py b/back/src/whombat/api/datasets.py index be452979..7b898749 100644 --- a/back/src/whombat/api/datasets.py +++ b/back/src/whombat/api/datasets.py @@ -4,16 +4,18 @@ import uuid import warnings from pathlib import Path -from typing import Sequence +from typing import BinaryIO, Sequence import pandas as pd from soundevent import data +from soundevent.io.aoef import AOEFObject, to_aeof from sqlalchemy import select, tuple_ from sqlalchemy.ext.asyncio import AsyncSession from whombat import exceptions, models, schemas from whombat.api import common from whombat.api.common import BaseAPI +from whombat.api.io import aoef from whombat.api.recordings import recordings from whombat.core import files from whombat.filters.base import Filter @@ -729,5 +731,30 @@ async def to_dataframe( ] ) + async def import_dataset( + self, + session: AsyncSession, + dataset: Path | BinaryIO | str, + dataset_audio_dir: Path, + audio_dir: Path | None = None, + ) -> schemas.Dataset: + db_dataset = await aoef.import_dataset( + session, + dataset, + dataset_dir=dataset_audio_dir, + audio_dir=audio_dir or Path.cwd(), + ) + await session.commit() + await session.refresh(db_dataset) + return schemas.Dataset.model_validate(db_dataset) + + async def export_dataset( + self, + session: AsyncSession, + dataset: schemas.Dataset, + ) -> AOEFObject: + soundevent_dataset = await self.to_soundevent(session, dataset) + return to_aeof(soundevent_dataset) + datasets = DatasetAPI() diff --git a/back/src/whombat/routes/datasets.py b/back/src/whombat/routes/datasets.py index c1907f23..92f59d4c 100644 --- a/back/src/whombat/routes/datasets.py +++ b/back/src/whombat/routes/datasets.py @@ -9,10 +9,9 @@ from fastapi import APIRouter, Body, Depends, UploadFile from fastapi.responses import Response, StreamingResponse from pydantic import DirectoryPath -from soundevent.io.aoef import DatasetObject, to_aeof +from soundevent.io.aoef import DatasetObject from whombat import api, schemas -from whombat.api.io import aoef from whombat.filters.datasets import DatasetFilter from whombat.routes.dependencies import Session, WhombatSettings from whombat.routes.types import Limit, Offset @@ -140,9 +139,8 @@ async def download_dataset_json( ): """Export a dataset.""" whombat_dataset = await api.datasets.get(session, dataset_uuid) - dataset = await api.datasets.to_soundevent(session, whombat_dataset) - obj = to_aeof(dataset) - filename = f"{dataset.name}_{obj.created_on.isoformat()}.json" + obj = await api.datasets.export_dataset(session, whombat_dataset) + filename = f"{whombat_dataset.name}_{obj.created_on.isoformat()}.json" return Response( obj.model_dump_json(), media_type="application/json", @@ -187,12 +185,9 @@ async def import_dataset( if not audio_dir.exists(): raise FileNotFoundError(f"Audio directory {audio_dir} does not exist.") - db_dataset = await aoef.import_dataset( + return await api.datasets.import_dataset( session, dataset.file, - dataset_dir=audio_dir, + dataset_audio_dir=audio_dir, audio_dir=settings.audio_dir, ) - await session.commit() - await session.refresh(db_dataset) - return schemas.Dataset.model_validate(db_dataset) diff --git a/back/tests/test_api/test_datasets.py b/back/tests/test_api/test_datasets.py index b68238fe..631ae48f 100644 --- a/back/tests/test_api/test_datasets.py +++ b/back/tests/test_api/test_datasets.py @@ -775,20 +775,13 @@ async def test_exported_datasets_paths_are_not_absolute( audio_dir = example_data_dir / "audio" assert audio_dir.is_dir() - db_dataset = await aoef.import_dataset( + whombat_dataset = await api.datasets.import_dataset( session, example_dataset, - dataset_dir=audio_dir, + dataset_audio_dir=audio_dir, audio_dir=audio_dir, ) - - await session.commit() - await session.refresh(db_dataset) - - whombat_dataset = await api.datasets.get(session, db_dataset.uuid) - dataset = await api.datasets.to_soundevent(session, whombat_dataset) - - exported = to_aeof(dataset) + exported = await api.datasets.export_dataset(session, whombat_dataset) for recording in exported.data.recordings or []: assert not recording.path.is_absolute() From 27b02d7308813955ce2233ec1ab8c7711d9fadd4 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sat, 1 Feb 2025 18:19:26 +0000 Subject: [PATCH 3/8] Fix path export --- back/src/whombat/api/datasets.py | 8 ++++++-- back/tests/test_api/test_datasets.py | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/back/src/whombat/api/datasets.py b/back/src/whombat/api/datasets.py index 7b898749..5cc48b64 100644 --- a/back/src/whombat/api/datasets.py +++ b/back/src/whombat/api/datasets.py @@ -753,8 +753,12 @@ async def export_dataset( session: AsyncSession, dataset: schemas.Dataset, ) -> AOEFObject: - soundevent_dataset = await self.to_soundevent(session, dataset) - return to_aeof(soundevent_dataset) + soundevent_dataset = await self.to_soundevent( + session, + dataset, + audio_dir=dataset.audio_dir, + ) + return to_aeof(soundevent_dataset, audio_dir=dataset.audio_dir) datasets = DatasetAPI() diff --git a/back/tests/test_api/test_datasets.py b/back/tests/test_api/test_datasets.py index 631ae48f..d676e0f0 100644 --- a/back/tests/test_api/test_datasets.py +++ b/back/tests/test_api/test_datasets.py @@ -784,5 +784,8 @@ async def test_exported_datasets_paths_are_not_absolute( exported = await api.datasets.export_dataset(session, whombat_dataset) for recording in exported.data.recordings or []: + # Check that paths are not absolute (full paths) assert not recording.path.is_absolute() - assert recording.path.is_relative_to(audio_dir) + + # Check that paths were exported relative to the dataset audio_dir + assert (audio_dir / recording.path).is_file() From a8d152f256dd8050d8f61f5e378e62c135fcb541 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sat, 1 Feb 2025 18:31:25 +0000 Subject: [PATCH 4/8] Update pyright --- back/uv.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/back/uv.lock b/back/uv.lock index e69c2ecf..4d869b26 100644 --- a/back/uv.lock +++ b/back/uv.lock @@ -340,7 +340,7 @@ name = "click" version = "8.1.7" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } wheels = [ @@ -1192,7 +1192,7 @@ version = "1.6.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "ghp-import" }, { name = "jinja2" }, { name = "markdown" }, @@ -1692,15 +1692,15 @@ wheels = [ [[package]] name = "pyright" -version = "1.1.384" +version = "1.1.393" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nodeenv" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/84/00/a23114619f9d005f4b0f35e037c76cee029174d090a6f73a355749c74f4a/pyright-1.1.384.tar.gz", hash = "sha256:25e54d61f55cbb45f1195ff89c488832d7a45d59f3e132f178fdf9ef6cafc706", size = 21956 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/c1/aede6c74e664ab103673e4f1b7fd3d058fef32276be5c43572f4067d4a8e/pyright-1.1.393.tar.gz", hash = "sha256:aeeb7ff4e0364775ef416a80111613f91a05c8e01e58ecfefc370ca0db7aed9c", size = 3790430 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/4a/e7f4d71d194ba675f3577d11eebe4e17a592c4d1c3f9986d4b321ba3c809/pyright-1.1.384-py3-none-any.whl", hash = "sha256:f0b6f4db2da38f27aeb7035c26192f034587875f751b847e9ad42ed0c704ac9e", size = 18578 }, + { url = "https://files.pythonhosted.org/packages/92/47/f0dd0f8afce13d92e406421ecac6df0990daee84335fc36717678577d3e0/pyright-1.1.393-py3-none-any.whl", hash = "sha256:8320629bb7a44ca90944ba599390162bf59307f3d9fb6e27da3b7011b8c17ae5", size = 5646057 }, ] [[package]] @@ -2482,7 +2482,7 @@ wheels = [ [[package]] name = "whombat" -version = "0.6.4" +version = "0.7.3" source = { editable = "." } dependencies = [ { name = "aiosqlite" }, From 5e915ae5c14e2e86c13c7670f408ffbefe129c69 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sat, 1 Feb 2025 18:31:30 +0000 Subject: [PATCH 5/8] Fix linting issues --- back/tests/test_api/test_datasets.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/back/tests/test_api/test_datasets.py b/back/tests/test_api/test_datasets.py index d676e0f0..315f09fe 100644 --- a/back/tests/test_api/test_datasets.py +++ b/back/tests/test_api/test_datasets.py @@ -5,12 +5,10 @@ from pathlib import Path import pytest -from soundevent.io.aoef import to_aeof from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from whombat import api, exceptions, models, schemas -from whombat.api.io import aoef async def test_created_dataset_is_stored_in_the_database( From 9a653e6adf9b0977d6544207df6c889ec3d5fe4c Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sun, 2 Feb 2025 15:07:26 +0000 Subject: [PATCH 6/8] Merge lint workflow into test --- .github/workflows/lint.yml | 89 -------------------------------------- .github/workflows/test.yml | 70 ++++++++++++++---------------- 2 files changed, 33 insertions(+), 126 deletions(-) delete mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index 98f07031..00000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,89 +0,0 @@ -name: Lint -on: - push: - branches: ["main"] - pull_request: - branches: ["main"] - -jobs: - lint-backend: - env: - UV_CACHE_DIR: /tmp/.uv-cache - - runs-on: "ubuntu-latest" - - steps: - - uses: actions/checkout@v4 - - - name: Set up uv - run: curl -LsSf https://astral.sh/uv/install.sh | sh - - - name: Set up Python 3.12 - run: uv python install 3.12 - - - name: Restore uv cache - uses: actions/cache@v4 - with: - path: /tmp/.uv-cache - key: uv-${{ runner.os }}-${{ hashFiles('back/uv.lock') }} - restore-keys: | - uv-${{ runner.os }}-${{ hashFiles('back/uv.lock') }} - uv-${{ runner.os }} - - - name: Install dev dependencies - run: | - cd back - uv sync --all-extras --dev - - - name: Run Ruff - run: | - cd back - uv run ruff check src tests - - - name: Check Formatting - run: | - cd back - uv run ruff format --check src tests - - - name: Run pyright - run: | - cd back - uv run pyright src tests - - - name: Minimize uv cache - run: uv cache prune --ci - - lint-frontend: - runs-on: "ubuntu-latest" - - steps: - - name: Checkout source code - uses: actions/checkout@v4 - - - name: Setup node - uses: actions/setup-node@v4 - with: - node-version: 22 - cache: "npm" - cache-dependency-path: | - front/package-lock.json - - - name: Install frontend dependencies - run: | - cd front - npm ci - - - name: Check formatting - run: | - cd front - npm run format-check - - - name: Lint with Nextjs - run: | - cd front - npm run lint - - - name: Lint with Typescript Compiler - run: | - cd front - npm run lint-tsc diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f1849812..0a8aa246 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,37 +24,12 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up uv - if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest' }} - run: curl -LsSf https://astral.sh/uv/install.sh | sh - - - name: Set up uv - if: ${{ matrix.os == 'windows-latest' }} - run: irm https://astral.sh/uv/install.ps1 | iex - shell: powershell - - - name: Set up Python ${{ matrix.python-version }} - run: uv python install ${{ matrix.python-version }} - - - name: Restore uv cache - uses: actions/cache@v4 - if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest' }} + - name: Install uv and set the python version + uses: astral-sh/setup-uv@v5 with: - path: /tmp/.uv-cache - key: uv-${{ runner.os }}-${{ hashFiles('back/uv.lock') }} - restore-keys: | - uv-${{ runner.os }}-${{ hashFiles('back/uv.lock') }} - uv-${{ runner.os }} - - - name: Restore uv cache - uses: actions/cache@v4 - if: ${{ matrix.os == 'windows-latest' }} - with: - path: /tmp/.uv-cache - key: uv-${{ runner.os }}-${{ hashFiles('back\uv.lock') }} - restore-keys: | - uv-${{ runner.os }}-${{ hashFiles('back\uv.lock') }} - uv-${{ runner.os }} + python-version: ${{ matrix.python-version }} + enable-cache: true + cache-dependency-glob: "back/uv.lock" - name: Install the project dependencies run: | @@ -70,11 +45,20 @@ jobs: if: ${{ matrix.os == 'ubuntu-latest' }} run: echo "$PWD/back/.venv/bin" >> $GITHUB_PATH + - name: Run Linter + run: | + cd back + uv run ruff check src tests + + - name: Check Formatting + run: | + cd back + uv run ruff format --check src tests + - name: Check types - uses: jakebailey/pyright-action@v2 - with: - working-directory: back/ - extra-args: src + run: | + cd back + uv run pyright src - name: Run tests run: | @@ -89,9 +73,6 @@ jobs: fail_ci_if_error: true CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - - name: Minimize uv cache - run: uv cache prune --ci - test-frontend: runs-on: "ubuntu-latest" @@ -112,6 +93,21 @@ jobs: cd front npm ci + - name: Check formatting + run: | + cd front + npm run format-check + + - name: Lint with Nextjs + run: | + cd front + npm run lint + + - name: Lint with Typescript Compiler + run: | + cd front + npm run lint-tsc + - name: Run tests run: | cd front From 09bf045f494f9067b84120e889cd5dde992043fb Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sun, 2 Feb 2025 15:25:22 +0000 Subject: [PATCH 7/8] Remove redundant lint step --- .github/workflows/test.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0a8aa246..76f938ea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,21 +30,13 @@ jobs: python-version: ${{ matrix.python-version }} enable-cache: true cache-dependency-glob: "back/uv.lock" + pyproject-file: "back/pyproject.toml" - name: Install the project dependencies run: | cd back uv sync --all-extras --dev - - name: Lint - run: | - cd back - uv run ruff check src tests - - - name: "Create cache key for documentation" - if: ${{ matrix.os == 'ubuntu-latest' }} - run: echo "$PWD/back/.venv/bin" >> $GITHUB_PATH - - name: Run Linter run: | cd back @@ -65,6 +57,10 @@ jobs: cd back uv run pytest --cov=src/whombat --cov-report=xml -n auto + - name: "Create cache key for documentation" + if: ${{ matrix.os == 'ubuntu-latest' }} + run: echo "$PWD/back/.venv/bin" >> $GITHUB_PATH + - name: Upload coverage reports to Codecov if: ${{ matrix.os == 'ubuntu-latest' }} uses: codecov/codecov-action@v4 From b50408f5dbde01ae5d04598152daed59fc12a547 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sun, 2 Feb 2025 17:12:29 +0000 Subject: [PATCH 8/8] Rename feature variables to avoid clashing with import --- back/src/whombat/api/clips.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/back/src/whombat/api/clips.py b/back/src/whombat/api/clips.py index 8879a78d..316e8ec6 100644 --- a/back/src/whombat/api/clips.py +++ b/back/src/whombat/api/clips.py @@ -72,8 +72,8 @@ async def create( **kwargs, ) - features = await self._create_clip_features(session, [clip]) - clip = clip.model_copy(update=dict(features=features[0])) + feats = await self._create_clip_features(session, [clip]) + clip = clip.model_copy(update=dict(features=feats[0])) self._update_cache(clip) return clip @@ -111,8 +111,8 @@ async def create_many_without_duplicates( clip_features = await self._create_clip_features(session, clips) return [ - clip.model_copy(update=dict(features=features)) - for clip, features in zip(clips, clip_features, strict=False) + clip.model_copy(update=dict(features=feats)) + for clip, feats in zip(clips, clip_features, strict=False) ] async def add_feature( @@ -308,8 +308,8 @@ async def _create_clip_features( create_values = [ (clip.id, feature.name, feature.value) - for clip, features in zip(clips, clip_features, strict=False) - for feature in features + for clip, feats in zip(clips, clip_features, strict=False) + for feature in feats ] # Get feature names