From bd954fce90be5606e7d05e35186f5bf51ba59cd1 Mon Sep 17 00:00:00 2001
From: Jarkko Jaakola <jarkko.jaakola@aiven.io>
Date: Wed, 22 Jan 2025 09:14:59 +0200
Subject: [PATCH] feat: handle urlencoded forward slash in subject

---
 src/schema_registry/routers/config.py         |  3 ++
 src/schema_registry/routers/mode.py           |  2 +
 .../routers/raw_path_router.py                | 43 +++++++++++++++++++
 src/schema_registry/routers/subjects.py       |  2 +
 tests/integration/test_request_forwarding.py  | 10 ++++-
 tests/integration/test_schema.py              |  5 ++-
 tests/utils.py                                |  4 +-
 7 files changed, 63 insertions(+), 6 deletions(-)
 create mode 100644 src/schema_registry/routers/raw_path_router.py

diff --git a/src/schema_registry/routers/config.py b/src/schema_registry/routers/config.py
index 9d99c42ee..de151b8da 100644
--- a/src/schema_registry/routers/config.py
+++ b/src/schema_registry/routers/config.py
@@ -12,14 +12,17 @@
 from schema_registry.controller import KarapaceSchemaRegistryController
 from schema_registry.registry import KarapaceSchemaRegistry
 from schema_registry.routers.errors import no_primary_url_error, unauthorized
+from schema_registry.routers.raw_path_router import RawPathRoute
 from schema_registry.routers.requests import CompatibilityLevelResponse, CompatibilityRequest, CompatibilityResponse
 from schema_registry.user import get_current_user
 from typing import Annotated
 
+
 config_router = APIRouter(
     prefix="/config",
     tags=["config"],
     responses={404: {"description": "Not found"}},
+    route_class=RawPathRoute,
 )
 
 
diff --git a/src/schema_registry/routers/mode.py b/src/schema_registry/routers/mode.py
index 4df141d3d..0c3c6d1e7 100644
--- a/src/schema_registry/routers/mode.py
+++ b/src/schema_registry/routers/mode.py
@@ -10,6 +10,7 @@
 from schema_registry.container import SchemaRegistryContainer
 from schema_registry.controller import KarapaceSchemaRegistryController
 from schema_registry.routers.errors import unauthorized
+from schema_registry.routers.raw_path_router import RawPathRoute
 from schema_registry.routers.requests import ModeResponse
 from schema_registry.user import get_current_user
 from typing import Annotated
@@ -18,6 +19,7 @@
     prefix="/mode",
     tags=["mode"],
     responses={404: {"description": "Not found"}},
+    route_class=RawPathRoute,
 )
 
 
diff --git a/src/schema_registry/routers/raw_path_router.py b/src/schema_registry/routers/raw_path_router.py
new file mode 100644
index 000000000..1b64cb8d9
--- /dev/null
+++ b/src/schema_registry/routers/raw_path_router.py
@@ -0,0 +1,43 @@
+"""
+Copyright (c) 2025 Aiven Ltd
+See LICENSE for details
+"""
+
+"""
+The MIT License (MIT)
+
+Copyright (c) 2018 Sebastián Ramírez
+"""
+
+from fastapi import HTTPException  # noqa: E402
+from fastapi.routing import APIRoute  # noqa: E402
+from starlette.routing import Match  # noqa: E402
+from starlette.types import Scope  # noqa: E402
+
+import re  # noqa: E402
+
+
+class RawPathRoute(APIRoute):
+    """The subject in the paths can contain url encoded forward slash
+
+    See explanation and origin of the solution:
+     - https://github.com/fastapi/fastapi/discussions/7328#discussioncomment-8443865
+    Starlette defect discussion:
+     - https://github.com/encode/starlette/issues/826
+    """
+
+    def matches(self, scope: Scope) -> tuple[Match, Scope]:
+        raw_path: str | None = None
+
+        if "raw_path" in scope and scope["raw_path"] is not None:
+            raw_path = scope["raw_path"].decode("utf-8")
+
+        if raw_path is None:
+            raise HTTPException(status_code=500, detail="Internal routing error")
+
+        # Drop the last forward slash if present. e.g. /path/ -> /path
+        raw_path = raw_path if raw_path[-1] != "/" else raw_path[:-1]
+
+        new_path = re.sub(r"\?.*", "", raw_path)
+        scope["path"] = new_path
+        return super().matches(scope)
diff --git a/src/schema_registry/routers/subjects.py b/src/schema_registry/routers/subjects.py
index 80cf79fa2..a3fae0720 100644
--- a/src/schema_registry/routers/subjects.py
+++ b/src/schema_registry/routers/subjects.py
@@ -14,6 +14,7 @@
 from schema_registry.controller import KarapaceSchemaRegistryController
 from schema_registry.registry import KarapaceSchemaRegistry
 from schema_registry.routers.errors import no_primary_url_error, unauthorized
+from schema_registry.routers.raw_path_router import RawPathRoute
 from schema_registry.routers.requests import SchemaIdResponse, SchemaRequest, SchemaResponse, SubjectSchemaVersionResponse
 from schema_registry.user import get_current_user
 from typing import Annotated
@@ -27,6 +28,7 @@
     prefix="/subjects",
     tags=["subjects"],
     responses={404: {"description": "Not found"}},
+    route_class=RawPathRoute,
 )
 
 
diff --git a/tests/integration/test_request_forwarding.py b/tests/integration/test_request_forwarding.py
index 9cd69988d..3264fb430 100644
--- a/tests/integration/test_request_forwarding.py
+++ b/tests/integration/test_request_forwarding.py
@@ -9,7 +9,11 @@
 from typing import AsyncGenerator
 from karapace.client import Client
 from tests.integration.utils.rest_client import RetryRestClient
-from tests.utils import new_random_name, repeat_until_master_is_available, repeat_until_successful_request
+from tests.utils import (
+    create_subject_name_factory,
+    repeat_until_master_is_available,
+    repeat_until_successful_request,
+)
 
 import asyncio
 import json
@@ -44,15 +48,17 @@ async def fixture_registry_async_client(
         await client.close()
 
 
+@pytest.mark.parametrize("subject", ["test_forwarding", "test_forw/arding"])
 async def test_schema_request_forwarding(
     registry_async_pair: list[str],
     request_forwarding_retry_client: RetryRestClient,
+    subject: str,
 ) -> None:
     master_url, slave_url = registry_async_pair
 
     max_tries, counter = 5, 0
     wait_time = 0.5
-    subject = new_random_name("subject")
+    subject = create_subject_name_factory(subject)()  # subject creation urlencodes
     schema = {"type": "string"}
     other_schema = {"type": "int"}
     # Config updates
diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py
index e7f325e47..9723d98e9 100644
--- a/tests/integration/test_schema.py
+++ b/tests/integration/test_schema.py
@@ -1553,8 +1553,9 @@ async def test_schema_subject_post_invalid(registry_async_client: Client) -> Non
     assert res.json()["message"] == [{"type": "missing", "loc": ["body", "schema"], "msg": "Field required", "input": {}}]
 
 
-async def test_schema_lifecycle(registry_async_client: Client) -> None:
-    subject = create_subject_name_factory("test_schema_lifecycle")()
+@pytest.mark.parametrize("subject", ["test_schema_lifecycle", "test_sche/ma_lifecycle"])
+async def test_schema_lifecycle(registry_async_client: Client, subject: str) -> None:
+    subject = create_subject_name_factory(subject)()  # subject creation urlencodes
     unique_field_factory = create_field_name_factory("unique_")
 
     unique_1 = unique_field_factory()
diff --git a/tests/utils.py b/tests/utils.py
index f578914f9..e9596f725 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -13,7 +13,7 @@
 from pathlib import Path
 from subprocess import Popen
 from typing import Any, IO
-from urllib.parse import quote
+from urllib.parse import quote_plus
 
 import asyncio
 import copy
@@ -252,7 +252,7 @@ def create_id_factory(prefix: str) -> Callable[[], str]:
 
     def create_name() -> str:
         nonlocal index
-        return new_random_name(f"{quote(prefix).replace('/', '_')}_{index}_")
+        return new_random_name(f"{quote_plus(prefix)}_{index}_")
 
     return create_name