From 6d88a155c3fded7a63fafad85e08331fcbaa66f1 Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 15 Jan 2026 15:58:15 +0000 Subject: [PATCH 1/6] Add writes benchmarking --- tests/perf/microbenchmarks/writes/config.yaml | 34 ++ .../microbenchmarks/writes/test_writes.py | 311 ++++++++++++++++++ 2 files changed, 345 insertions(+) create mode 100644 tests/perf/microbenchmarks/writes/config.yaml create mode 100644 tests/perf/microbenchmarks/writes/test_writes.py diff --git a/tests/perf/microbenchmarks/writes/config.yaml b/tests/perf/microbenchmarks/writes/config.yaml new file mode 100644 index 000000000..b4d93ba52 --- /dev/null +++ b/tests/perf/microbenchmarks/writes/config.yaml @@ -0,0 +1,34 @@ +common: + bucket_types: + - "regional" + - "zonal" + file_sizes_mib: + - 1024 # 1GiB + chunk_sizes_mib: [100] + rounds: 10 + +workload: + + ############# single proc single coroutines ######### + - name: "write_seq" + pattern: "seq" + coros: [1] + processes: [1] + + ############# single proc multiple coroutines ######### + + - name: "write_seq_multi_coros" + pattern: "seq" + coros: [2, 4, 8, 16] + processes: [1] + + ############# multiple proc multiple coroutines ######### + - name: "write_seq_multi_process" + pattern: "seq" + coros: [1, 2] + processes: [8, 16, 32, 64] + + +defaults: + DEFAULT_RAPID_ZONAL_BUCKET: "chandrasiri-benchmarks-zb" + DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb" diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py new file mode 100644 index 000000000..61dc354db --- /dev/null +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -0,0 +1,311 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Docstring for tests.perf.microbenchmarks.test_writes + +File for benchmarking zonal writes (i.e. uploads) +""" + +import os +import time +import asyncio +from concurrent.futures import ThreadPoolExecutor +import multiprocessing +import logging + +import pytest +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter + +from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info, RandomBytesIO +from tests.perf.microbenchmarks.conftest import publish_resource_metrics +import tests.perf.microbenchmarks.config as config + +# Get write parameters +all_params = config.get_write_params() + +async def create_client(): + """Initializes async client and gets the current event loop.""" + return AsyncGrpcClient().grpc_client + +async def upload_chunks_using_grpc_async(client, filename, other_params): + start_time = time.monotonic_ns() + + writer = AsyncAppendableObjectWriter( + client=client, bucket_name=other_params.bucket_name, object_name=filename + ) + await writer.open() + + uploaded_bytes = 0 + upload_size = other_params.file_size_bytes + chunk_size = other_params.chunk_size_bytes + + while uploaded_bytes < upload_size: + bytes_to_upload = min(chunk_size, upload_size - uploaded_bytes) + data = os.urandom(bytes_to_upload) + await writer.append(data) + uploaded_bytes += bytes_to_upload + await writer.close() + + assert uploaded_bytes == upload_size + + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + return elapsed_time / 1_000_000_000 + +def upload_chunks_using_grpc(loop, client, filename, other_params): + return loop.run_until_complete( + upload_chunks_using_grpc_async(client, filename, other_params) + ) + +def upload_using_json(_, json_client, filename, other_params): + start_time = time.monotonic_ns() + + bucket = json_client.bucket(other_params.bucket_name) + blob = bucket.blob(filename) + upload_size = other_params.file_size_bytes + # Don't use BytesIO because it'll report high memory usage for large files. + # `RandomBytesIO` generates random bytes on the fly. + in_mem_file = RandomBytesIO(upload_size) + # data = os.urandom(upload_size) + blob.upload_from_file(in_mem_file) + + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + return elapsed_time / 1_000_000_000 + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_single_proc_single_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = upload_chunks_using_grpc + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = upload_using_json + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names[0], + params, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + +def upload_files_using_grpc_multi_coro(loop, client, files, other_params): + async def main(): + tasks = [] + for f in files: + tasks.append( + upload_chunks_using_grpc_async(client, f, other_params) + ) + return await asyncio.gather(*tasks) + + results = loop.run_until_complete(main()) + return max(results) + +def upload_files_using_json_multi_threaded(_, json_client, files, other_params): + results = [] + with ThreadPoolExecutor(max_workers=other_params.num_coros) as executor: + futures = [] + for f in files: + future = executor.submit( + upload_using_json, None, json_client, f, other_params + ) + futures.append(future) + + for future in futures: + results.append(future.result()) + + return max(results) + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq_multi_coros"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_single_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = upload_files_using_grpc_multi_coro + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = upload_files_using_json_multi_threaded + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names, + params, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + +def _upload_files_worker(files_to_upload, other_params, bucket_type): + if bucket_type == "zonal": + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + try: + result = upload_files_using_grpc_multi_coro( + loop, client, files_to_upload, other_params + ) + finally: + # cleanup loop + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + return result + else: # regional + from google.cloud import storage + json_client = storage.Client() + return upload_files_using_json_multi_threaded( + None, json_client, files_to_upload, other_params + ) + +def upload_files_mp_mc_wrapper(files_names, params): + num_processes = params.num_processes + num_coros = params.num_coros + + filenames_per_process = [ + files_names[i : i + num_coros] for i in range(0, len(files_names), num_coros) + ] + + args = [ + ( + filenames, + params, + params.bucket_type, + ) + for filenames in filenames_per_process + ] + + ctx = multiprocessing.get_context("spawn") + with ctx.Pool(processes=num_processes) as pool: + results = pool.starmap(_upload_files_worker, args) + + return max(results) + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq_multi_process"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_multi_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = upload_files_mp_mc_wrapper(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + files_names, + params, + ), + ) + finally: + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) \ No newline at end of file From 3f76aab01248133f36bb13c834f00a2aa23ff961 Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 15 Jan 2026 16:46:22 +0000 Subject: [PATCH 2/6] fix: update config file path in write parameters and adjust import statement in test --- tests/perf/microbenchmarks/writes/config.py | 2 +- tests/perf/microbenchmarks/writes/test_writes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/perf/microbenchmarks/writes/config.py b/tests/perf/microbenchmarks/writes/config.py index 1870a14d4..1537c6668 100644 --- a/tests/perf/microbenchmarks/writes/config.py +++ b/tests/perf/microbenchmarks/writes/config.py @@ -33,7 +33,7 @@ def get_write_params() -> Dict[str, List[WriteParameters]]: you may use itertools.product """ params: Dict[str, List[WriteParameters]] = {} - config_path = os.path.join(os.path.dirname(__file__), "config_writes.yaml") + config_path = os.path.join(os.path.dirname(__file__), "config.yaml") with open(config_path, "r") as f: config = yaml.safe_load(f) diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py index 61dc354db..b506763f0 100644 --- a/tests/perf/microbenchmarks/writes/test_writes.py +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -30,7 +30,7 @@ from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info, RandomBytesIO from tests.perf.microbenchmarks.conftest import publish_resource_metrics -import tests.perf.microbenchmarks.config as config +import tests.perf.microbenchmarks.writes.config as config # Get write parameters all_params = config.get_write_params() From 0a9d18505ed06f45671e359e4fdacd88e2c4ba36 Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 15 Jan 2026 18:06:04 +0000 Subject: [PATCH 3/6] fix: update bucket_map to use environment variables for default bucket values --- tests/perf/microbenchmarks/writes/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/perf/microbenchmarks/writes/config.py b/tests/perf/microbenchmarks/writes/config.py index 7d328ec3d..7f5f6f1f3 100644 --- a/tests/perf/microbenchmarks/writes/config.py +++ b/tests/perf/microbenchmarks/writes/config.py @@ -44,8 +44,8 @@ def get_write_params() -> Dict[str, List[WriteParameters]]: rounds = common_params["rounds"] bucket_map = { - "zonal": config["defaults"]["DEFAULT_RAPID_ZONAL_BUCKET"], - "regional": config["defaults"]["DEFAULT_STANDARD_BUCKET"], + "zonal": os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", config['defaults']['DEFAULT_RAPID_ZONAL_BUCKET']), + "regional": os.environ.get("DEFAULT_STANDARD_BUCKET", config['defaults']['DEFAULT_STANDARD_BUCKET']) } for workload in config["workload"]: From 7c7808ec9ca5dfba4e46dd0ea83f346d48b6028c Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 15 Jan 2026 19:12:59 +0000 Subject: [PATCH 4/6] update README and test_writes.py with detailed usage examples and enhanced docstrings --- tests/perf/microbenchmarks/README.md | 5 +- .../microbenchmarks/writes/test_writes.py | 110 +++++++++++++++++- 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/tests/perf/microbenchmarks/README.md b/tests/perf/microbenchmarks/README.md index 0219a5bd9..a3e045682 100644 --- a/tests/perf/microbenchmarks/README.md +++ b/tests/perf/microbenchmarks/README.md @@ -15,10 +15,13 @@ pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test To run a single test, append `::` followed by the test name to the file path. -Example: +Examples: ```bash pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py::test_downloads_single_proc_single_coro ``` +```bash +pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/writes/test_writes.py::test_uploads_single_proc_single_coro +``` ## Configuration diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py index b506763f0..ded797ad0 100644 --- a/tests/perf/microbenchmarks/writes/test_writes.py +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -11,10 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Docstring for tests.perf.microbenchmarks.test_writes +"""Microbenchmarks for Google Cloud Storage write operations. + +This module contains performance benchmarks for various write patterns to Google Cloud Storage. +It includes three main test functions: +- `test_uploads_single_proc_single_coro`: Benchmarks uploads using a single process and a single coroutine. +- `test_uploads_single_proc_multi_coro`: Benchmarks uploads using a single process and multiple coroutines. +- `test_uploads_multi_proc_multi_coro`: Benchmarks uploads using multiple processes and multiple coroutines. -File for benchmarking zonal writes (i.e. uploads) +All other functions in this module are helper methods for these three tests. """ import os @@ -40,6 +45,17 @@ async def create_client(): return AsyncGrpcClient().grpc_client async def upload_chunks_using_grpc_async(client, filename, other_params): + """Uploads a file in chunks using the gRPC API asynchronously. + + Args: + client: The async gRPC client. + filename (str): The name of the object to create. + other_params: An object containing benchmark parameters like bucket_name, + file_size_bytes, and chunk_size_bytes. + + Returns: + float: The total time taken for the upload in seconds. + """ start_time = time.monotonic_ns() writer = AsyncAppendableObjectWriter( @@ -65,11 +81,34 @@ async def upload_chunks_using_grpc_async(client, filename, other_params): return elapsed_time / 1_000_000_000 def upload_chunks_using_grpc(loop, client, filename, other_params): + """Wrapper to run the async gRPC upload in a synchronous context. + + Args: + loop: The asyncio event loop. + client: The async gRPC client. + filename (str): The name of the object to create. + other_params: An object containing benchmark parameters. + + Returns: + float: The total time taken for the upload in seconds. + """ return loop.run_until_complete( upload_chunks_using_grpc_async(client, filename, other_params) ) def upload_using_json(_, json_client, filename, other_params): + """Uploads a file using the JSON API. + + Args: + _ (any): Unused. + json_client: The standard Python Storage client. + filename (str): The name of the object to create. + other_params: An object containing benchmark parameters like bucket_name + and file_size_bytes. + + Returns: + float: The total time taken for the upload in seconds. + """ start_time = time.monotonic_ns() bucket = json_client.bucket(other_params.bucket_name) @@ -78,7 +117,6 @@ def upload_using_json(_, json_client, filename, other_params): # Don't use BytesIO because it'll report high memory usage for large files. # `RandomBytesIO` generates random bytes on the fly. in_mem_file = RandomBytesIO(upload_size) - # data = os.urandom(upload_size) blob.upload_from_file(in_mem_file) end_time = time.monotonic_ns() @@ -94,6 +132,11 @@ def upload_using_json(_, json_client, filename, other_params): def test_uploads_single_proc_single_coro( benchmark, storage_client, blobs_to_delete, monitor, workload_params ): + """ + Benchmarks uploads using a single process and a single coroutine. + It passes the workload to either `upload_chunks_using_grpc` (for zonal buckets) + or `upload_using_json` (for regional buckets) for benchmarking using `benchmark.pedantic`. + """ params, files_names = workload_params if params.bucket_type == "zonal": @@ -143,6 +186,17 @@ def target_wrapper(*args, **kwargs): ) def upload_files_using_grpc_multi_coro(loop, client, files, other_params): + """Uploads multiple files concurrently using gRPC with asyncio. + + Args: + loop: The asyncio event loop. + client: The async gRPC client. + files (list): A list of filenames to upload. + other_params: An object containing benchmark parameters. + + Returns: + float: The maximum latency observed among all coroutines. + """ async def main(): tasks = [] for f in files: @@ -155,6 +209,17 @@ async def main(): return max(results) def upload_files_using_json_multi_threaded(_, json_client, files, other_params): + """Uploads multiple files concurrently using the JSON API with a ThreadPoolExecutor. + + Args: + _ (any): Unused. + json_client: The standard Python Storage client. + files (list): A list of filenames to upload. + other_params: An object containing benchmark parameters. + + Returns: + float: The maximum latency observed among all concurrent uploads. + """ results = [] with ThreadPoolExecutor(max_workers=other_params.num_coros) as executor: futures = [] @@ -178,6 +243,13 @@ def upload_files_using_json_multi_threaded(_, json_client, files, other_params): def test_uploads_single_proc_multi_coro( benchmark, storage_client, blobs_to_delete, monitor, workload_params ): + """ + Benchmarks uploads using a single process and multiple coroutines. + + For zonal buckets, it uses `upload_files_using_grpc_multi_coro` to upload + multiple files concurrently with asyncio. For regional buckets, it uses + `upload_files_using_json_multi_threaded` with a ThreadPoolExecutor. + """ params, files_names = workload_params if params.bucket_type == "zonal": @@ -227,6 +299,19 @@ def target_wrapper(*args, **kwargs): ) def _upload_files_worker(files_to_upload, other_params, bucket_type): + """A worker function for multi-processing uploads. + + Initializes a client and calls the appropriate multi-coroutine upload function. + This function is intended to be called in a separate process. + + Args: + files_to_upload (list): List of filenames for this worker to upload. + other_params: An object containing benchmark parameters. + bucket_type (str): The type of bucket ('zonal' or 'regional'). + + Returns: + float: The maximum latency from the uploads performed by this worker. + """ if bucket_type == "zonal": loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) @@ -251,6 +336,17 @@ def _upload_files_worker(files_to_upload, other_params, bucket_type): ) def upload_files_mp_mc_wrapper(files_names, params): + """Wrapper for multi-process, multi-coroutine uploads. + + Distributes files among a pool of processes and calls the worker function. + + Args: + files_names (list): The full list of filenames to upload. + params: An object containing benchmark parameters (num_processes, num_coros). + + Returns: + float: The maximum latency observed across all processes. + """ num_processes = params.num_processes num_coros = params.num_coros @@ -282,6 +378,12 @@ def upload_files_mp_mc_wrapper(files_names, params): def test_uploads_multi_proc_multi_coro( benchmark, storage_client, blobs_to_delete, monitor, workload_params ): + """ + Benchmarks uploads using multiple processes and multiple coroutines. + + This test distributes files among a pool of processes using `upload_files_mp_mc_wrapper`. + The reported latency for each round is the maximum latency observed across all processes. + """ params, files_names = workload_params output_times = [] From 0066029b65e77e64d20c86f03d2f7a5f9e71e04d Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 15 Jan 2026 19:20:43 +0000 Subject: [PATCH 5/6] refactor: simplify num_files calculation and clean up imports in write tests --- tests/perf/microbenchmarks/writes/config.py | 5 +---- tests/perf/microbenchmarks/writes/test_writes.py | 5 +++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/perf/microbenchmarks/writes/config.py b/tests/perf/microbenchmarks/writes/config.py index 7f5f6f1f3..b435d3bec 100644 --- a/tests/perf/microbenchmarks/writes/config.py +++ b/tests/perf/microbenchmarks/writes/config.py @@ -74,10 +74,7 @@ def get_write_params() -> Dict[str, List[WriteParameters]]: chunk_size_bytes = chunk_size_mib * 1024 * 1024 bucket_name = bucket_map[bucket_type] - if "single_file" in workload_name: - num_files = 1 - else: - num_files = num_processes * num_coros + num_files = num_processes * num_coros # Create a descriptive name for the parameter set name = f"{workload_name}_{bucket_type}_{num_processes}p_{num_coros}c" diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py index ded797ad0..2b3c57abd 100644 --- a/tests/perf/microbenchmarks/writes/test_writes.py +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -36,6 +36,7 @@ from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info, RandomBytesIO from tests.perf.microbenchmarks.conftest import publish_resource_metrics import tests.perf.microbenchmarks.writes.config as config +from google.cloud import storage # Get write parameters all_params = config.get_write_params() @@ -75,7 +76,7 @@ async def upload_chunks_using_grpc_async(client, filename, other_params): await writer.close() assert uploaded_bytes == upload_size - + end_time = time.monotonic_ns() elapsed_time = end_time - start_time return elapsed_time / 1_000_000_000 @@ -329,7 +330,7 @@ def _upload_files_worker(files_to_upload, other_params, bucket_type): loop.close() return result else: # regional - from google.cloud import storage + json_client = storage.Client() return upload_files_using_json_multi_threaded( None, json_client, files_to_upload, other_params From 042c60bdf4be826610571077ae8d451c7e921ed0 Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 15 Jan 2026 19:21:47 +0000 Subject: [PATCH 6/6] refactor: enhance docstring for get_write_params function to clarify its purpose and usage --- tests/perf/microbenchmarks/writes/config.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/perf/microbenchmarks/writes/config.py b/tests/perf/microbenchmarks/writes/config.py index b435d3bec..bc1548511 100644 --- a/tests/perf/microbenchmarks/writes/config.py +++ b/tests/perf/microbenchmarks/writes/config.py @@ -24,13 +24,17 @@ def get_write_params() -> Dict[str, List[WriteParameters]]: - """ - Docstring for get_write_params - 1. this function output a list of WriteParameters. - 2. to populate the values of WriteParameters, use default values from config_writes.yaml - 3. generate all possible params , ie - no. of params should be equal to bucket_type*file_size_mib, chunk_size * process * coros - you may use itertools.product + """Generates benchmark parameters from a YAML configuration file. + + This function reads the configuration from `config.yaml`, located in the + same directory, and generates all possible combinations of write parameters + based on the defined workloads. It uses `itertools.product` to create + a Cartesian product of parameters like bucket types, file sizes, etc. + + Returns: + Dict[str, List[WriteParameters]]: A dictionary where keys are workload + names and values are lists of `WriteParameters` instances for that + workload. """ params: Dict[str, List[WriteParameters]] = {} config_path = os.path.join(os.path.dirname(__file__), "config.yaml")