Merge pull request #49 from mbsantiago/fix/GH-48-export-relative-paths

Fix/gh 48 export relative paths
mbsantiago · Feb 2, 2025 · 75dfd32 · 75dfd32
2 parents dd70269 + b50408f
commit 75dfd32
Show file tree

Hide file tree

Showing 8 changed files with 118 additions and 153 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -24,63 +24,43 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Set up uv
-        if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest' }}
-        run: curl -LsSf https://astral.sh/uv/install.sh | sh
-
-      - name: Set up uv
-        if: ${{ matrix.os == 'windows-latest' }}
-        run: irm https://astral.sh/uv/install.ps1 | iex
-        shell: powershell
-
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
-
-      - name: Restore uv cache
-        uses: actions/cache@v4
-        if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest' }}
-        with:
-          path: /tmp/.uv-cache
-          key: uv-${{ runner.os }}-${{ hashFiles('back/uv.lock') }}
-          restore-keys: |
-            uv-${{ runner.os }}-${{ hashFiles('back/uv.lock') }}
-            uv-${{ runner.os }}
-
-      - name: Restore uv cache
-        uses: actions/cache@v4
-        if: ${{ matrix.os == 'windows-latest' }}
+      - name: Install uv and set the python version
+        uses: astral-sh/setup-uv@v5
         with:
-          path: /tmp/.uv-cache
-          key: uv-${{ runner.os }}-${{ hashFiles('back\uv.lock') }}
-          restore-keys: |
-            uv-${{ runner.os }}-${{ hashFiles('back\uv.lock') }}
-            uv-${{ runner.os }}
+          python-version: ${{ matrix.python-version }}
+          enable-cache: true
+          cache-dependency-glob: "back/uv.lock"
+          pyproject-file: "back/pyproject.toml"
 
       - name: Install the project dependencies
         run: |
           cd back
           uv sync --all-extras --dev
 
-      - name: Lint
+      - name: Run Linter
         run: |
           cd back
           uv run ruff check src tests
 
-      - name: "Create cache key for documentation"
-        if: ${{ matrix.os == 'ubuntu-latest' }}
-        run: echo "$PWD/back/.venv/bin" >> $GITHUB_PATH
+      - name: Check Formatting
+        run: |
+          cd back
+          uv run ruff format --check src tests
 
       - name: Check types
-        uses: jakebailey/pyright-action@v2
-        with:
-          working-directory: back/
-          extra-args: src
+        run: |
+          cd back
+          uv run pyright src
 
       - name: Run tests
         run: |
           cd back
           uv run pytest --cov=src/whombat --cov-report=xml -n auto
 
+      - name: "Create cache key for documentation"
+        if: ${{ matrix.os == 'ubuntu-latest' }}
+        run: echo "$PWD/back/.venv/bin" >> $GITHUB_PATH
+
       - name: Upload coverage reports to Codecov
         if: ${{ matrix.os == 'ubuntu-latest' }}
         uses: codecov/codecov-action@v4
@@ -89,9 +69,6 @@ jobs:
           fail_ci_if_error: true
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
-      - name: Minimize uv cache
-        run: uv cache prune --ci
-
   test-frontend:
     runs-on: "ubuntu-latest"
 
@@ -112,6 +89,21 @@ jobs:
           cd front
           npm ci
 
+      - name: Check formatting
+        run: |
+          cd front
+          npm run format-check
+
+      - name: Lint with Nextjs
+        run: |
+          cd front
+          npm run lint
+
+      - name: Lint with Typescript Compiler
+        run: |
+          cd front
+          npm run lint-tsc
+
       - name: Run tests
         run: |
           cd front

diff --git a/back/src/whombat/api/clips.py b/back/src/whombat/api/clips.py
@@ -72,8 +72,8 @@ async def create(
             **kwargs,
         )
 
-        features = await self._create_clip_features(session, [clip])
-        clip = clip.model_copy(update=dict(features=features[0]))
+        feats = await self._create_clip_features(session, [clip])
+        clip = clip.model_copy(update=dict(features=feats[0]))
         self._update_cache(clip)
         return clip
 
@@ -111,8 +111,8 @@ async def create_many_without_duplicates(
 
         clip_features = await self._create_clip_features(session, clips)
         return [
-            clip.model_copy(update=dict(features=features))
-            for clip, features in zip(clips, clip_features, strict=False)
+            clip.model_copy(update=dict(features=feats))
+            for clip, feats in zip(clips, clip_features, strict=False)
         ]
 
     async def add_feature(
@@ -308,8 +308,8 @@ async def _create_clip_features(
 
         create_values = [
             (clip.id, feature.name, feature.value)
-            for clip, features in zip(clips, clip_features, strict=False)
-            for feature in features
+            for clip, feats in zip(clips, clip_features, strict=False)
+            for feature in feats
         ]
 
         # Get feature names

diff --git a/back/src/whombat/api/datasets.py b/back/src/whombat/api/datasets.py
@@ -4,16 +4,18 @@
 import uuid
 import warnings
 from pathlib import Path
-from typing import Sequence
+from typing import BinaryIO, Sequence
 
 import pandas as pd
 from soundevent import data
+from soundevent.io.aoef import AOEFObject, to_aeof
 from sqlalchemy import select, tuple_
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from whombat import exceptions, models, schemas
 from whombat.api import common
 from whombat.api.common import BaseAPI
+from whombat.api.io import aoef
 from whombat.api.recordings import recordings
 from whombat.core import files
 from whombat.filters.base import Filter
@@ -729,5 +731,34 @@ async def to_dataframe(
             ]
         )
 
+    async def import_dataset(
+        self,
+        session: AsyncSession,
+        dataset: Path | BinaryIO | str,
+        dataset_audio_dir: Path,
+        audio_dir: Path | None = None,
+    ) -> schemas.Dataset:
+        db_dataset = await aoef.import_dataset(
+            session,
+            dataset,
+            dataset_dir=dataset_audio_dir,
+            audio_dir=audio_dir or Path.cwd(),
+        )
+        await session.commit()
+        await session.refresh(db_dataset)
+        return schemas.Dataset.model_validate(db_dataset)
+
+    async def export_dataset(
+        self,
+        session: AsyncSession,
+        dataset: schemas.Dataset,
+    ) -> AOEFObject:
+        soundevent_dataset = await self.to_soundevent(
+            session,
+            dataset,
+            audio_dir=dataset.audio_dir,
+        )
+        return to_aeof(soundevent_dataset, audio_dir=dataset.audio_dir)
+
 
 datasets = DatasetAPI()
diff --git a/back/src/whombat/routes/datasets.py b/back/src/whombat/routes/datasets.py
@@ -9,10 +9,9 @@
 from fastapi import APIRouter, Body, Depends, UploadFile
 from fastapi.responses import Response, StreamingResponse
 from pydantic import DirectoryPath
-from soundevent.io.aoef import DatasetObject, to_aeof
+from soundevent.io.aoef import DatasetObject
 
 from whombat import api, schemas
-from whombat.api.io import aoef
 from whombat.filters.datasets import DatasetFilter
 from whombat.routes.dependencies import Session, WhombatSettings
 from whombat.routes.types import Limit, Offset
@@ -140,9 +139,8 @@ async def download_dataset_json(
 ):
     """Export a dataset."""
     whombat_dataset = await api.datasets.get(session, dataset_uuid)
-    dataset = await api.datasets.to_soundevent(session, whombat_dataset)
-    obj = to_aeof(dataset)
-    filename = f"{dataset.name}_{obj.created_on.isoformat()}.json"
+    obj = await api.datasets.export_dataset(session, whombat_dataset)
+    filename = f"{whombat_dataset.name}_{obj.created_on.isoformat()}.json"
     return Response(
         obj.model_dump_json(),
         media_type="application/json",
@@ -187,12 +185,9 @@ async def import_dataset(
     if not audio_dir.exists():
         raise FileNotFoundError(f"Audio directory {audio_dir} does not exist.")
 
-    db_dataset = await aoef.import_dataset(
+    return await api.datasets.import_dataset(
         session,
         dataset.file,
-        dataset_dir=audio_dir,
+        dataset_audio_dir=audio_dir,
         audio_dir=settings.audio_dir,
     )
-    await session.commit()
-    await session.refresh(db_dataset)
-    return schemas.Dataset.model_validate(db_dataset)
diff --git a/back/tests/conftest.py b/back/tests/conftest.py
@@ -26,6 +26,16 @@
 logging.getLogger("passlib").setLevel(logging.WARNING)
 
 
+@pytest.fixture
+def repo_root() -> Path:
+    return Path(__file__).parent.parent.parent
+
+
+@pytest.fixture
+def example_data_dir(repo_root: Path) -> Path:
+    return repo_root / "example_data"
+
+
 @pytest.fixture
 def data_dir() -> Path:
     """Return the path to the data directory."""

diff --git a/back/tests/test_api/test_datasets.py b/back/tests/test_api/test_datasets.py
@@ -761,3 +761,29 @@ async def test_create_dataset_registers_all_recordings(
 
     all_recordings, _ = await api.recordings.get_many(session)
     assert len(all_recordings) == 2
+
+
+async def test_exported_datasets_paths_are_not_absolute(
+    session: AsyncSession,
+    example_data_dir: Path,
+):
+    example_dataset = example_data_dir / "example_dataset.json"
+    assert example_dataset.is_file()
+
+    audio_dir = example_data_dir / "audio"
+    assert audio_dir.is_dir()
+
+    whombat_dataset = await api.datasets.import_dataset(
+        session,
+        example_dataset,
+        dataset_audio_dir=audio_dir,
+        audio_dir=audio_dir,
+    )
+    exported = await api.datasets.export_dataset(session, whombat_dataset)
+
+    for recording in exported.data.recordings or []:
+        # Check that paths are not absolute (full paths)
+        assert not recording.path.is_absolute()
+
+        # Check that paths were exported relative to the dataset audio_dir
+        assert (audio_dir / recording.path).is_file()