From 48ee1ce4fa2d06cf58339fd41ddcf7f662c705df Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Fri, 12 Dec 2025 10:39:02 -0800 Subject: [PATCH 1/6] Make REST catalog namespace separator configurable The REST spec currently uses %1F as the UTF-8 encoded namespace separator for multi-part namespaces. --- pyiceberg/catalog/rest/__init__.py | 44 +++++++++++++++++++++++------- tests/catalog/test_rest.py | 27 ++++++++++++++++++ 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 7866f5e8bd..8a4bd753e5 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -20,6 +20,7 @@ Any, Union, ) +from urllib.parse import quote, unquote from pydantic import ConfigDict, Field, field_validator from requests import HTTPError, Session @@ -227,7 +228,8 @@ class IdentifierKind(Enum): VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported" VIEW_ENDPOINTS_SUPPORTED_DEFAULT = False -NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) +NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator" +DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) def _retry_hook(retry_state: RetryCallState) -> None: @@ -319,6 +321,7 @@ class RestCatalog(Catalog): uri: str _session: Session _supported_endpoints: set[Endpoint] + _namespace_separator: str def __init__(self, name: str, **properties: str): """Rest Catalog. @@ -333,6 +336,8 @@ def __init__(self, name: str, **properties: str): self.uri = properties[URI] self._fetch_config() self._session = self._create_session() + separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) + self._namespace_separator = unquote(separator_from_properties) def _create_session(self) -> Session: """Create a request session with provided catalog configuration.""" @@ -478,6 +483,16 @@ def _extract_optional_oauth_params(self) -> dict[str, str]: return optional_oauth_param + def _encode_namespace_path(self, namespace: Identifier) -> str: + """ + Encode a namespace for use as a path parameter in a URL. + + Each part of the namespace is URL-encoded using `urllib.parse.quote` + (ensuring characters like '/' are encoded) and then joined by the + configured namespace separator. + """ + return self._namespace_separator.join(quote(part, safe="") for part in namespace) + def _fetch_config(self) -> None: params = {} if warehouse_location := self.properties.get(WAREHOUSE_LOCATION): @@ -519,11 +534,19 @@ def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identi def _split_identifier_for_path( self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE ) -> Properties: + from urllib.parse import quote + if isinstance(identifier, TableIdentifier): - return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value: identifier.name} + return { + "namespace": self._encode_namespace_path(tuple(identifier.namespace.root)), + kind.value: quote(identifier.name, safe=""), + } identifier_tuple = self._identifier_to_validated_tuple(identifier) - return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), kind.value: identifier_tuple[-1]} + return { + "namespace": self._encode_namespace_path(identifier_tuple[:-1]), + kind.value: quote(identifier_tuple[-1], safe=""), + } def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, Identifier | str]: identifier_tuple = self._identifier_to_validated_tuple(identifier) @@ -741,7 +764,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: self._check_endpoint(Capability.V1_LIST_TABLES) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace_concat = self._encode_namespace_path(namespace_tuple) response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat)) try: response.raise_for_status() @@ -827,7 +850,7 @@ def list_views(self, namespace: str | Identifier) -> list[Identifier]: if Capability.V1_LIST_VIEWS not in self._supported_endpoints: return [] namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace_concat = self._encode_namespace_path(namespace_tuple) response = self._session.get(self.url(Endpoints.list_views, namespace=namespace_concat)) try: response.raise_for_status() @@ -897,7 +920,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties = def drop_namespace(self, namespace: str | Identifier) -> None: self._check_endpoint(Capability.V1_DELETE_NAMESPACE) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) response = self._session.delete(self.url(Endpoints.drop_namespace, namespace=namespace)) try: response.raise_for_status() @@ -910,7 +933,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: namespace_tuple = self.identifier_to_tuple(namespace) response = self._session.get( self.url( - f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}" + f"{Endpoints.list_namespaces}?parent={self._namespace_separator.join(namespace_tuple)}" if namespace_tuple else Endpoints.list_namespaces ), @@ -926,7 +949,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: def load_namespace_properties(self, namespace: str | Identifier) -> Properties: self._check_endpoint(Capability.V1_LOAD_NAMESPACE) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) response = self._session.get(self.url(Endpoints.load_namespace_metadata, namespace=namespace)) try: response.raise_for_status() @@ -941,7 +964,7 @@ def update_namespace_properties( ) -> PropertiesUpdateSummary: self._check_endpoint(Capability.V1_UPDATE_NAMESPACE) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) payload = {"removals": list(removals or []), "updates": updates} response = self._session.post(self.url(Endpoints.update_namespace_properties, namespace=namespace), json=payload) try: @@ -958,7 +981,8 @@ def update_namespace_properties( @retry(**_RETRY_ARGS) def namespace_exists(self, namespace: str | Identifier) -> bool: namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) + # fallback in order to work with older rest catalog implementations if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints: try: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 5aae59497b..b67c37bcdf 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1921,6 +1921,33 @@ def test_rest_catalog_with_google_credentials_path( actual_headers = history[0].headers assert actual_headers["Authorization"] == expected_auth_header + assert actual_headers["Authorization"] == expected_auth_header + + +def test_custom_namespace_separator(rest_mock: Mocker) -> None: + custom_separator = "-" + namespace_part1 = "some" + namespace_part2 = "namespace" + # The expected URL path segment should use the literal custom_separator + expected_url_path_segment = f"{namespace_part1}{custom_separator}{namespace_part2}" + + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + rest_mock.get( + f"{TEST_URI}v1/namespaces/{expected_url_path_segment}", + json={"namespace": [namespace_part1, namespace_part2], "properties": {"prop": "yes"}}, + status_code=204, + request_headers=TEST_HEADERS, + ) + + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{"namespace-separator": custom_separator}) + catalog.load_namespace_properties((namespace_part1, namespace_part2)) + + assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/{expected_url_path_segment}" + @pytest.mark.filterwarnings( "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning" From d3bfb41646dfa182a76bb409ba044f2e63d5dd7b Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 15 Dec 2025 10:36:40 -0800 Subject: [PATCH 2/6] PR comments --- pyiceberg/catalog/rest/__init__.py | 15 +++------------ tests/catalog/test_rest.py | 4 +--- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 8a4bd753e5..7d18f8a1cb 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -320,7 +320,6 @@ class ListViewsResponse(IcebergBaseModel): class RestCatalog(Catalog): uri: str _session: Session - _supported_endpoints: set[Endpoint] _namespace_separator: str def __init__(self, name: str, **properties: str): @@ -337,6 +336,8 @@ def __init__(self, name: str, **properties: str): self._fetch_config() self._session = self._create_session() separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) + if not separator_from_properties: + raise ValueError("Namespace separator cannot be an empty string") self._namespace_separator = unquote(separator_from_properties) def _create_session(self) -> Session: @@ -534,8 +535,6 @@ def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identi def _split_identifier_for_path( self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE ) -> Properties: - from urllib.parse import quote - if isinstance(identifier, TableIdentifier): return { "namespace": self._encode_namespace_path(tuple(identifier.namespace.root)), @@ -933,7 +932,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: namespace_tuple = self.identifier_to_tuple(namespace) response = self._session.get( self.url( - f"{Endpoints.list_namespaces}?parent={self._namespace_separator.join(namespace_tuple)}" + f"{Endpoints.list_namespaces}?parent={self._encode_namespace_path(namespace_tuple)}" if namespace_tuple else Endpoints.list_namespaces ), @@ -983,14 +982,6 @@ def namespace_exists(self, namespace: str | Identifier) -> bool: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace = self._encode_namespace_path(namespace_tuple) - # fallback in order to work with older rest catalog implementations - if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints: - try: - self.load_namespace_properties(namespace_tuple) - return True - except NoSuchNamespaceError: - return False - response = self._session.head(self.url(Endpoints.namespace_exists, namespace=namespace)) if response.status_code == 404: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index b67c37bcdf..2c19e42a80 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1921,8 +1921,6 @@ def test_rest_catalog_with_google_credentials_path( actual_headers = history[0].headers assert actual_headers["Authorization"] == expected_auth_header - assert actual_headers["Authorization"] == expected_auth_header - def test_custom_namespace_separator(rest_mock: Mocker) -> None: custom_separator = "-" @@ -1939,7 +1937,7 @@ def test_custom_namespace_separator(rest_mock: Mocker) -> None: rest_mock.get( f"{TEST_URI}v1/namespaces/{expected_url_path_segment}", json={"namespace": [namespace_part1, namespace_part2], "properties": {"prop": "yes"}}, - status_code=204, + status_code=200, request_headers=TEST_HEADERS, ) From 72f0ade7458110f6e6e6833951880f80423c95d6 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 5 Jan 2026 14:34:42 -0800 Subject: [PATCH 3/6] Test fixes from PR --- pyiceberg/catalog/rest/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 7d18f8a1cb..74bef9c505 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -335,10 +335,6 @@ def __init__(self, name: str, **properties: str): self.uri = properties[URI] self._fetch_config() self._session = self._create_session() - separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) - if not separator_from_properties: - raise ValueError("Namespace separator cannot be an empty string") - self._namespace_separator = unquote(separator_from_properties) def _create_session(self) -> Session: """Create a request session with provided catalog configuration.""" @@ -526,6 +522,11 @@ def _fetch_config(self) -> None: if property_as_bool(self.properties, VIEW_ENDPOINTS_SUPPORTED, VIEW_ENDPOINTS_SUPPORTED_DEFAULT): self._supported_endpoints.update(VIEW_ENDPOINTS) + separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) + if not separator_from_properties: + raise ValueError("Namespace separator cannot be an empty string") + self._namespace_separator = unquote(separator_from_properties) + def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identifier: identifier_tuple = self.identifier_to_tuple(identifier) if len(identifier_tuple) <= 1: @@ -542,6 +543,7 @@ def _split_identifier_for_path( } identifier_tuple = self._identifier_to_validated_tuple(identifier) + # Use quote to ensure that '/' aren't treated as path separators. return { "namespace": self._encode_namespace_path(identifier_tuple[:-1]), kind.value: quote(identifier_tuple[-1], safe=""), From 2c616fcd794713fce4dccaf1d078d9342dfad376 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 5 Jan 2026 15:05:41 -0800 Subject: [PATCH 4/6] add test --- tests/integration/test_catalog.py | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 0c77666568..6cb957c379 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -16,6 +16,7 @@ # under the License. import os +import uuid from collections.abc import Generator from pathlib import Path, PosixPath @@ -601,3 +602,36 @@ def test_register_table_existing(test_catalog: Catalog, table_schema_nested: Sch # Assert that registering the table again raises TableAlreadyExistsError with pytest.raises(TableAlreadyExistsError): test_catalog.register_table(identifier, metadata_location=table.metadata_location) + + +@pytest.mark.integration +def test_rest_custom_namespace_separator(rest_catalog: Catalog, table_schema_simple: Schema): + """ + Tests that the REST catalog correctly picks up the namespace-separator from the config endpoint. + The REST Catalog is configured with a '.' namespace separator. + """ + assert rest_catalog._namespace_separator == "." + + unique_id = uuid.uuid4().hex + parent_namespace = (f"test_parent_{unique_id}",) + child_namespace_part = "child" + full_namespace_tuple = (*parent_namespace, child_namespace_part) + + table_name = "my_table" + full_table_identifier_tuple = (*full_namespace_tuple, table_name) + + rest_catalog.create_namespace(namespace=parent_namespace) + rest_catalog.create_namespace(namespace=full_namespace_tuple) + + namespaces = rest_catalog.list_namespaces(parent_namespace) + assert full_namespace_tuple in namespaces + + # Test with a table + table = rest_catalog.create_table(identifier=full_table_identifier_tuple, schema=table_schema_simple) + assert table.identifier == full_table_identifier_tuple + + tables = rest_catalog.list_tables(full_namespace_tuple) + assert full_table_identifier_tuple in tables + + loaded_table = rest_catalog.load_table(identifier=full_table_identifier_tuple) + assert loaded_table.identifier == full_table_identifier_tuple From 1559c2683983b07f4a836bce734135776d9a544a Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 8 Jan 2026 12:06:25 -0800 Subject: [PATCH 5/6] test fix --- tests/integration/test_catalog.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 6cb957c379..717f51e605 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -628,10 +628,10 @@ def test_rest_custom_namespace_separator(rest_catalog: Catalog, table_schema_sim # Test with a table table = rest_catalog.create_table(identifier=full_table_identifier_tuple, schema=table_schema_simple) - assert table.identifier == full_table_identifier_tuple + assert table.name() == full_table_identifier_tuple tables = rest_catalog.list_tables(full_namespace_tuple) assert full_table_identifier_tuple in tables loaded_table = rest_catalog.load_table(identifier=full_table_identifier_tuple) - assert loaded_table.identifier == full_table_identifier_tuple + assert loaded_table.name() == full_table_identifier_tuple From 56e93b9eeec88298b5cac448496adf2206b8b1c7 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 8 Jan 2026 12:12:24 -0800 Subject: [PATCH 6/6] lint fixes --- tests/integration/test_catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 717f51e605..fb7026256c 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -605,7 +605,7 @@ def test_register_table_existing(test_catalog: Catalog, table_schema_nested: Sch @pytest.mark.integration -def test_rest_custom_namespace_separator(rest_catalog: Catalog, table_schema_simple: Schema): +def test_rest_custom_namespace_separator(rest_catalog: RestCatalog, table_schema_simple: Schema) -> None: """ Tests that the REST catalog correctly picks up the namespace-separator from the config endpoint. The REST Catalog is configured with a '.' namespace separator.