From d3842684f3ec4c515cacf397de2b079c87248944 Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Mon, 15 Dec 2025 10:04:05 -0800 Subject: [PATCH 1/4] Add ability to unzip artifacts zipped with zstd --- requirements.txt | 1 + .../artifacts/providers/zip_provider.py | 39 +++++++++++++++++-- .../artifacts/providers/test_zip_provider.py | 32 +++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index da155d76..b61a51c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,3 +20,4 @@ sentry-kafka-schemas==2.1.2 sentry-sdk>=2.36.0 sortedcontainers>=2.4.0 typing-extensions>=4.15.0 +zipfile-zstd==0.0.4 diff --git a/src/launchpad/artifacts/providers/zip_provider.py b/src/launchpad/artifacts/providers/zip_provider.py index 3ca73be2..0fc1b4aa 100644 --- a/src/launchpad/artifacts/providers/zip_provider.py +++ b/src/launchpad/artifacts/providers/zip_provider.py @@ -1,7 +1,7 @@ import zipfile from pathlib import Path -from typing import List +from typing import List, Set from launchpad.utils.file_utils import cleanup_directory, create_temp_directory from launchpad.utils.logging import get_logger @@ -11,6 +11,9 @@ DEFAULT_MAX_FILE_COUNT = 100000 DEFAULT_MAX_UNCOMPRESSED_SIZE = 10 * 1024 * 1024 * 1024 +# Compression method constants +COMPRESSION_ZSTD = 93 # Zstandard compression method + class UnreasonableZipError(ValueError): """Raised when a zip file exceeds reasonable limits.""" @@ -92,13 +95,43 @@ def extract_to_temp_directory(self) -> Path: self._temp_dirs.append(temp_dir) self._safe_extract(str(self.path), str(temp_dir)) - logger.debug(f"Extracted zip contents to {temp_dir} using system unzip") + logger.debug(f"Extracted zip contents to {temp_dir}") return temp_dir + def _detect_compression_methods(self, zip_path: str) -> Set[int]: + """Detect compression methods used in the zip file. + + Args: + zip_path: Path to the zip file + + Returns: + Set of compression method integers used in the zip file + """ + with zipfile.ZipFile(zip_path, "r") as zf: + return {info.compress_type for info in zf.infolist()} + def _safe_extract(self, zip_path: str, extract_path: str): - """Extract the zip contents to a temporary directory, ensuring that the paths are safe from path traversal attacks.""" + """Extract the zip contents to a temporary directory, ensuring that the paths are safe from path traversal attacks. + + Supports both standard compression methods and Zstandard compression. + """ base_dir = Path(extract_path) + + # Detect if zstandard compression is used + compression_methods = self._detect_compression_methods(zip_path) + uses_zstd = COMPRESSION_ZSTD in compression_methods + + if uses_zstd: + logger.debug("Detected Zstandard compression in zip file") + try: + import zipfile_zstd # noqa: F401 + except ImportError: + raise RuntimeError( + "Zstandard-compressed zip file detected, but zipfile-zstd package is not installed. " + "Install it with: pip install zipfile-zstd" + ) + with zipfile.ZipFile(zip_path, "r") as zip_ref: check_reasonable_zip(zip_ref) for member in zip_ref.namelist(): diff --git a/tests/unit/artifacts/providers/test_zip_provider.py b/tests/unit/artifacts/providers/test_zip_provider.py index dee1f9b8..25989315 100644 --- a/tests/unit/artifacts/providers/test_zip_provider.py +++ b/tests/unit/artifacts/providers/test_zip_provider.py @@ -6,6 +6,7 @@ import pytest from launchpad.artifacts.providers.zip_provider import ( + COMPRESSION_ZSTD, UnreasonableZipError, UnsafePathError, ZipProvider, @@ -88,6 +89,37 @@ def test_invalid_zip_file(self) -> None: with pytest.raises(zipfile.BadZipFile): provider.extract_to_temp_directory() + def test_detect_compression_methods(self, hackernews_xcarchive: Path) -> None: + provider = ZipProvider(hackernews_xcarchive) + methods = provider._detect_compression_methods(str(hackernews_xcarchive)) + + # Verify detection works and standard zips use deflate compression, not zstd + assert zipfile.ZIP_DEFLATED in methods + assert COMPRESSION_ZSTD not in methods + + def test_extract_zstd_zip(self) -> None: + """Test that zstd-compressed zips can be extracted when zipfile-zstd is available.""" + try: + import zipfile_zstd # noqa: F401 + except ImportError: + pytest.skip("zipfile-zstd not installed") + + with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as temp_file: + temp_path = Path(temp_file.name) + + with zipfile.ZipFile(temp_path, "w") as zf: + zf.writestr("test.txt", "content", compress_type=COMPRESSION_ZSTD) + + try: + provider = ZipProvider(temp_path) + temp_dir = provider.extract_to_temp_directory() + + assert temp_dir.exists() + assert (temp_dir / "test.txt").exists() + assert (temp_dir / "test.txt").read_text() == "content" + finally: + temp_path.unlink(missing_ok=True) + class TestIsSafePath: def test_valid_paths(self) -> None: From 7ffa8dcb5fdb6a22273a75915b7723bead8598d9 Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Mon, 15 Dec 2025 10:32:40 -0800 Subject: [PATCH 2/4] simplify --- .../artifacts/providers/zip_provider.py | 34 ++----------- .../artifacts/providers/test_zip_provider.py | 51 +++++++------------ 2 files changed, 22 insertions(+), 63 deletions(-) diff --git a/src/launchpad/artifacts/providers/zip_provider.py b/src/launchpad/artifacts/providers/zip_provider.py index 0fc1b4aa..0381a86d 100644 --- a/src/launchpad/artifacts/providers/zip_provider.py +++ b/src/launchpad/artifacts/providers/zip_provider.py @@ -1,7 +1,9 @@ import zipfile from pathlib import Path -from typing import List, Set +from typing import List + +import zipfile_zstd # noqa: F401 - Import registers zstd handler with zipfile from launchpad.utils.file_utils import cleanup_directory, create_temp_directory from launchpad.utils.logging import get_logger @@ -11,9 +13,6 @@ DEFAULT_MAX_FILE_COUNT = 100000 DEFAULT_MAX_UNCOMPRESSED_SIZE = 10 * 1024 * 1024 * 1024 -# Compression method constants -COMPRESSION_ZSTD = 93 # Zstandard compression method - class UnreasonableZipError(ValueError): """Raised when a zip file exceeds reasonable limits.""" @@ -99,39 +98,12 @@ def extract_to_temp_directory(self) -> Path: return temp_dir - def _detect_compression_methods(self, zip_path: str) -> Set[int]: - """Detect compression methods used in the zip file. - - Args: - zip_path: Path to the zip file - - Returns: - Set of compression method integers used in the zip file - """ - with zipfile.ZipFile(zip_path, "r") as zf: - return {info.compress_type for info in zf.infolist()} - def _safe_extract(self, zip_path: str, extract_path: str): """Extract the zip contents to a temporary directory, ensuring that the paths are safe from path traversal attacks. Supports both standard compression methods and Zstandard compression. """ base_dir = Path(extract_path) - - # Detect if zstandard compression is used - compression_methods = self._detect_compression_methods(zip_path) - uses_zstd = COMPRESSION_ZSTD in compression_methods - - if uses_zstd: - logger.debug("Detected Zstandard compression in zip file") - try: - import zipfile_zstd # noqa: F401 - except ImportError: - raise RuntimeError( - "Zstandard-compressed zip file detected, but zipfile-zstd package is not installed. " - "Install it with: pip install zipfile-zstd" - ) - with zipfile.ZipFile(zip_path, "r") as zip_ref: check_reasonable_zip(zip_ref) for member in zip_ref.namelist(): diff --git a/tests/unit/artifacts/providers/test_zip_provider.py b/tests/unit/artifacts/providers/test_zip_provider.py index 25989315..5c4fbfff 100644 --- a/tests/unit/artifacts/providers/test_zip_provider.py +++ b/tests/unit/artifacts/providers/test_zip_provider.py @@ -6,7 +6,6 @@ import pytest from launchpad.artifacts.providers.zip_provider import ( - COMPRESSION_ZSTD, UnreasonableZipError, UnsafePathError, ZipProvider, @@ -89,37 +88,6 @@ def test_invalid_zip_file(self) -> None: with pytest.raises(zipfile.BadZipFile): provider.extract_to_temp_directory() - def test_detect_compression_methods(self, hackernews_xcarchive: Path) -> None: - provider = ZipProvider(hackernews_xcarchive) - methods = provider._detect_compression_methods(str(hackernews_xcarchive)) - - # Verify detection works and standard zips use deflate compression, not zstd - assert zipfile.ZIP_DEFLATED in methods - assert COMPRESSION_ZSTD not in methods - - def test_extract_zstd_zip(self) -> None: - """Test that zstd-compressed zips can be extracted when zipfile-zstd is available.""" - try: - import zipfile_zstd # noqa: F401 - except ImportError: - pytest.skip("zipfile-zstd not installed") - - with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as temp_file: - temp_path = Path(temp_file.name) - - with zipfile.ZipFile(temp_path, "w") as zf: - zf.writestr("test.txt", "content", compress_type=COMPRESSION_ZSTD) - - try: - provider = ZipProvider(temp_path) - temp_dir = provider.extract_to_temp_directory() - - assert temp_dir.exists() - assert (temp_dir / "test.txt").exists() - assert (temp_dir / "test.txt").read_text() == "content" - finally: - temp_path.unlink(missing_ok=True) - class TestIsSafePath: def test_valid_paths(self) -> None: @@ -157,3 +125,22 @@ def test_max_file_size(self, hackernews_xcarchive: Path) -> None: # iOS fixture is ~32MB uncompressed, so limit of 10MB should fail with pytest.raises(UnreasonableZipError, match="exceeding the limit of 10.0MB"): check_reasonable_zip(zf, max_uncompressed_size=10 * 1024 * 1024) + + def test_extract_zstd_zip(self) -> None: + """Test that zstd-compressed zips can be extracted.""" + with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as temp_file: + temp_path = Path(temp_file.name) + + # Create a zstd-compressed zip (compression method 93) + with zipfile.ZipFile(temp_path, "w") as zf: + zf.writestr("test.txt", "content", compress_type=93) + + try: + provider = ZipProvider(temp_path) + temp_dir = provider.extract_to_temp_directory() + + assert temp_dir.exists() + assert (temp_dir / "test.txt").exists() + assert (temp_dir / "test.txt").read_text() == "content" + finally: + temp_path.unlink(missing_ok=True) From 85447121e36e5bf5fd5f102879dc830be0a16978 Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Mon, 15 Dec 2025 10:44:04 -0800 Subject: [PATCH 3/4] place monkeypatch in right spot --- src/launchpad/__init__.py | 5 ++++- src/launchpad/artifacts/providers/zip_provider.py | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/launchpad/__init__.py b/src/launchpad/__init__.py index f102a9ca..567650b1 100644 --- a/src/launchpad/__init__.py +++ b/src/launchpad/__init__.py @@ -1 +1,4 @@ -__version__ = "0.0.1" +# Monkey patches - import these first to register handlers globally +import zipfile_zstd # noqa: F401 - Registers zstd compression support with zipfile module + +__version__ = "0fg.0.1" diff --git a/src/launchpad/artifacts/providers/zip_provider.py b/src/launchpad/artifacts/providers/zip_provider.py index 0381a86d..8ae8d72a 100644 --- a/src/launchpad/artifacts/providers/zip_provider.py +++ b/src/launchpad/artifacts/providers/zip_provider.py @@ -3,8 +3,6 @@ from pathlib import Path from typing import List -import zipfile_zstd # noqa: F401 - Import registers zstd handler with zipfile - from launchpad.utils.file_utils import cleanup_directory, create_temp_directory from launchpad.utils.logging import get_logger From be4745d19aa9119802f7d0a247a87b3cf96f178c Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Mon, 15 Dec 2025 13:37:39 -0800 Subject: [PATCH 4/4] comments --- src/launchpad/__init__.py | 4 ++-- tests/unit/artifacts/providers/test_zip_provider.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/launchpad/__init__.py b/src/launchpad/__init__.py index 567650b1..de83ada5 100644 --- a/src/launchpad/__init__.py +++ b/src/launchpad/__init__.py @@ -1,4 +1,4 @@ # Monkey patches - import these first to register handlers globally -import zipfile_zstd # noqa: F401 - Registers zstd compression support with zipfile module +import zipfile_zstd # noqa: F401 - Registers zstd compression support with zipfile module. Should not be required after upgrading to python 3.14 -__version__ = "0fg.0.1" +__version__ = "0.0.1" diff --git a/tests/unit/artifacts/providers/test_zip_provider.py b/tests/unit/artifacts/providers/test_zip_provider.py index 5c4fbfff..b39a3825 100644 --- a/tests/unit/artifacts/providers/test_zip_provider.py +++ b/tests/unit/artifacts/providers/test_zip_provider.py @@ -128,7 +128,7 @@ def test_max_file_size(self, hackernews_xcarchive: Path) -> None: def test_extract_zstd_zip(self) -> None: """Test that zstd-compressed zips can be extracted.""" - with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as temp_file: + with tempfile.NamedTemporaryFile(suffix=".zip") as temp_file: temp_path = Path(temp_file.name) # Create a zstd-compressed zip (compression method 93)