diff --git a/changes/3657.misc.md b/changes/3657.misc.md new file mode 100644 index 0000000000..1411704674 --- /dev/null +++ b/changes/3657.misc.md @@ -0,0 +1 @@ +Fix obstore _transform_list_dir implementation to correctly relativize paths (removing lstrip usage). \ No newline at end of file diff --git a/src/zarr/storage/_obstore.py b/src/zarr/storage/_obstore.py index 5c2197ecf6..6e4011da59 100644 --- a/src/zarr/storage/_obstore.py +++ b/src/zarr/storage/_obstore.py @@ -4,6 +4,8 @@ import contextlib import pickle from collections import defaultdict +from itertools import chain +from operator import itemgetter from typing import TYPE_CHECKING, Generic, Self, TypedDict, TypeVar from zarr.abc.store import ( @@ -15,6 +17,7 @@ ) from zarr.core.common import concurrent_map from zarr.core.config import config +from zarr.storage._utils import _relativize_path if TYPE_CHECKING: from collections.abc import AsyncGenerator, Coroutine, Iterable, Sequence @@ -263,10 +266,11 @@ async def _transform_list_dir( # We assume that the underlying object-store implementation correctly handles the # prefix, so we don't double-check that the returned results actually start with the # given prefix. - prefixes = [obj.lstrip(prefix).lstrip("/") for obj in list_result["common_prefixes"]] - objects = [obj["path"].removeprefix(prefix).lstrip("/") for obj in list_result["objects"]] - for item in prefixes + objects: - yield item + prefix = prefix.rstrip("/") + for path in chain( + list_result["common_prefixes"], map(itemgetter("path"), list_result["objects"]) + ): + yield _relativize_path(path=path, prefix=prefix) class _BoundedRequest(TypedDict): diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index ad3b80da41..454a58af9e 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -491,24 +491,36 @@ async def test_list_empty_path(self, store: S) -> None: assert observed_prefix_sorted == expected_prefix_sorted async def test_list_dir(self, store: S) -> None: - root = "foo" - store_dict = { - root + "/zarr.json": self.buffer_cls.from_bytes(b"bar"), - root + "/c/1": self.buffer_cls.from_bytes(b"\x01"), - } + roots_and_keys: list[tuple[str, dict[str, Buffer]]] = [ + ( + "foo", + { + "foo/zarr.json": self.buffer_cls.from_bytes(b"bar"), + "foo/c/1": self.buffer_cls.from_bytes(b"\x01"), + }, + ), + ( + "foo/bar", + { + "foo/bar/foobar_first_child": self.buffer_cls.from_bytes(b"1"), + "foo/bar/foobar_second_child/zarr.json": self.buffer_cls.from_bytes(b"2"), + }, + ), + ] assert await _collect_aiterator(store.list_dir("")) == () - assert await _collect_aiterator(store.list_dir(root)) == () - await store._set_many(store_dict.items()) + for root, store_dict in roots_and_keys: + assert await _collect_aiterator(store.list_dir(root)) == () - keys_observed = await _collect_aiterator(store.list_dir(root)) - keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict} + await store._set_many(store_dict.items()) - assert sorted(keys_observed) == sorted(keys_expected) + keys_observed = await _collect_aiterator(store.list_dir(root)) + keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict} + assert sorted(keys_observed) == sorted(keys_expected) - keys_observed = await _collect_aiterator(store.list_dir(root + "/")) - assert sorted(keys_expected) == sorted(keys_observed) + keys_observed = await _collect_aiterator(store.list_dir(root + "/")) + assert sorted(keys_expected) == sorted(keys_observed) async def test_set_if_not_exists(self, store: S) -> None: key = "k"