From bd954fce90be5606e7d05e35186f5bf51ba59cd1 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola <jarkko.jaakola@aiven.io> Date: Wed, 22 Jan 2025 09:14:59 +0200 Subject: [PATCH] feat: handle urlencoded forward slash in subject --- src/schema_registry/routers/config.py | 3 ++ src/schema_registry/routers/mode.py | 2 + .../routers/raw_path_router.py | 43 +++++++++++++++++++ src/schema_registry/routers/subjects.py | 2 + tests/integration/test_request_forwarding.py | 10 ++++- tests/integration/test_schema.py | 5 ++- tests/utils.py | 4 +- 7 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 src/schema_registry/routers/raw_path_router.py diff --git a/src/schema_registry/routers/config.py b/src/schema_registry/routers/config.py index 9d99c42ee..de151b8da 100644 --- a/src/schema_registry/routers/config.py +++ b/src/schema_registry/routers/config.py @@ -12,14 +12,17 @@ from schema_registry.controller import KarapaceSchemaRegistryController from schema_registry.registry import KarapaceSchemaRegistry from schema_registry.routers.errors import no_primary_url_error, unauthorized +from schema_registry.routers.raw_path_router import RawPathRoute from schema_registry.routers.requests import CompatibilityLevelResponse, CompatibilityRequest, CompatibilityResponse from schema_registry.user import get_current_user from typing import Annotated + config_router = APIRouter( prefix="/config", tags=["config"], responses={404: {"description": "Not found"}}, + route_class=RawPathRoute, ) diff --git a/src/schema_registry/routers/mode.py b/src/schema_registry/routers/mode.py index 4df141d3d..0c3c6d1e7 100644 --- a/src/schema_registry/routers/mode.py +++ b/src/schema_registry/routers/mode.py @@ -10,6 +10,7 @@ from schema_registry.container import SchemaRegistryContainer from schema_registry.controller import KarapaceSchemaRegistryController from schema_registry.routers.errors import unauthorized +from schema_registry.routers.raw_path_router import RawPathRoute from schema_registry.routers.requests import ModeResponse from schema_registry.user import get_current_user from typing import Annotated @@ -18,6 +19,7 @@ prefix="/mode", tags=["mode"], responses={404: {"description": "Not found"}}, + route_class=RawPathRoute, ) diff --git a/src/schema_registry/routers/raw_path_router.py b/src/schema_registry/routers/raw_path_router.py new file mode 100644 index 000000000..1b64cb8d9 --- /dev/null +++ b/src/schema_registry/routers/raw_path_router.py @@ -0,0 +1,43 @@ +""" +Copyright (c) 2025 Aiven Ltd +See LICENSE for details +""" + +""" +The MIT License (MIT) + +Copyright (c) 2018 Sebastián RamÃrez +""" + +from fastapi import HTTPException # noqa: E402 +from fastapi.routing import APIRoute # noqa: E402 +from starlette.routing import Match # noqa: E402 +from starlette.types import Scope # noqa: E402 + +import re # noqa: E402 + + +class RawPathRoute(APIRoute): + """The subject in the paths can contain url encoded forward slash + + See explanation and origin of the solution: + - https://github.com/fastapi/fastapi/discussions/7328#discussioncomment-8443865 + Starlette defect discussion: + - https://github.com/encode/starlette/issues/826 + """ + + def matches(self, scope: Scope) -> tuple[Match, Scope]: + raw_path: str | None = None + + if "raw_path" in scope and scope["raw_path"] is not None: + raw_path = scope["raw_path"].decode("utf-8") + + if raw_path is None: + raise HTTPException(status_code=500, detail="Internal routing error") + + # Drop the last forward slash if present. e.g. /path/ -> /path + raw_path = raw_path if raw_path[-1] != "/" else raw_path[:-1] + + new_path = re.sub(r"\?.*", "", raw_path) + scope["path"] = new_path + return super().matches(scope) diff --git a/src/schema_registry/routers/subjects.py b/src/schema_registry/routers/subjects.py index 80cf79fa2..a3fae0720 100644 --- a/src/schema_registry/routers/subjects.py +++ b/src/schema_registry/routers/subjects.py @@ -14,6 +14,7 @@ from schema_registry.controller import KarapaceSchemaRegistryController from schema_registry.registry import KarapaceSchemaRegistry from schema_registry.routers.errors import no_primary_url_error, unauthorized +from schema_registry.routers.raw_path_router import RawPathRoute from schema_registry.routers.requests import SchemaIdResponse, SchemaRequest, SchemaResponse, SubjectSchemaVersionResponse from schema_registry.user import get_current_user from typing import Annotated @@ -27,6 +28,7 @@ prefix="/subjects", tags=["subjects"], responses={404: {"description": "Not found"}}, + route_class=RawPathRoute, ) diff --git a/tests/integration/test_request_forwarding.py b/tests/integration/test_request_forwarding.py index 9cd69988d..3264fb430 100644 --- a/tests/integration/test_request_forwarding.py +++ b/tests/integration/test_request_forwarding.py @@ -9,7 +9,11 @@ from typing import AsyncGenerator from karapace.client import Client from tests.integration.utils.rest_client import RetryRestClient -from tests.utils import new_random_name, repeat_until_master_is_available, repeat_until_successful_request +from tests.utils import ( + create_subject_name_factory, + repeat_until_master_is_available, + repeat_until_successful_request, +) import asyncio import json @@ -44,15 +48,17 @@ async def fixture_registry_async_client( await client.close() +@pytest.mark.parametrize("subject", ["test_forwarding", "test_forw/arding"]) async def test_schema_request_forwarding( registry_async_pair: list[str], request_forwarding_retry_client: RetryRestClient, + subject: str, ) -> None: master_url, slave_url = registry_async_pair max_tries, counter = 5, 0 wait_time = 0.5 - subject = new_random_name("subject") + subject = create_subject_name_factory(subject)() # subject creation urlencodes schema = {"type": "string"} other_schema = {"type": "int"} # Config updates diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index e7f325e47..9723d98e9 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -1553,8 +1553,9 @@ async def test_schema_subject_post_invalid(registry_async_client: Client) -> Non assert res.json()["message"] == [{"type": "missing", "loc": ["body", "schema"], "msg": "Field required", "input": {}}] -async def test_schema_lifecycle(registry_async_client: Client) -> None: - subject = create_subject_name_factory("test_schema_lifecycle")() +@pytest.mark.parametrize("subject", ["test_schema_lifecycle", "test_sche/ma_lifecycle"]) +async def test_schema_lifecycle(registry_async_client: Client, subject: str) -> None: + subject = create_subject_name_factory(subject)() # subject creation urlencodes unique_field_factory = create_field_name_factory("unique_") unique_1 = unique_field_factory() diff --git a/tests/utils.py b/tests/utils.py index f578914f9..e9596f725 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -13,7 +13,7 @@ from pathlib import Path from subprocess import Popen from typing import Any, IO -from urllib.parse import quote +from urllib.parse import quote_plus import asyncio import copy @@ -252,7 +252,7 @@ def create_id_factory(prefix: str) -> Callable[[], str]: def create_name() -> str: nonlocal index - return new_random_name(f"{quote(prefix).replace('/', '_')}_{index}_") + return new_random_name(f"{quote_plus(prefix)}_{index}_") return create_name