Skip to content

Commit 40b1d75

Browse files
committed
feat: Add 'whole' pattern and update config to bytes in reads_regional benchmark
1 parent 2a68c53 commit 40b1d75

3 files changed

Lines changed: 37 additions & 10 deletions

File tree

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads_regional/config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def _get_params() -> Dict[str, List[TimeBasedReadParameters]]:
3434

3535
common_params = config["common"]
3636
read_types = common_params["read_types"]
37-
file_sizes_mib = common_params["file_sizes_mib"]
37+
file_sizes = common_params["file_sizes"]
3838
chunk_sizes_kib = common_params["chunk_sizes_kib"]
3939
num_ranges = common_params["num_ranges"]
4040
rounds = common_params["rounds"]
@@ -56,7 +56,7 @@ def _get_params() -> Dict[str, List[TimeBasedReadParameters]]:
5656
# Create a product of all parameter combinations
5757
product = itertools.product(
5858
read_types,
59-
file_sizes_mib,
59+
file_sizes,
6060
chunk_sizes_kib,
6161
num_ranges,
6262
processes,
@@ -65,19 +65,19 @@ def _get_params() -> Dict[str, List[TimeBasedReadParameters]]:
6565

6666
for (
6767
read_type,
68-
file_size_mib,
68+
file_size,
6969
chunk_size_kib,
7070
num_ranges_val,
7171
num_processes,
7272
num_coros,
7373
) in product:
74-
file_size_bytes = file_size_mib * 1024 * 1024
74+
file_size_bytes = file_size
7575
chunk_size_bytes = chunk_size_kib * 1024
7676

7777
num_files = num_processes
7878

7979
# Create a descriptive name for the parameter set
80-
name = f"{pattern}_{read_type}_{num_processes}p_{num_coros}c_{file_size_mib}MiB_{chunk_size_kib}KiB_{num_ranges_val}ranges"
80+
name = f"{pattern}_{read_type}_{num_processes}p_{num_coros}c_{file_size / (1024 * 1024)}MiB_{chunk_size_kib}KiB_{num_ranges_val}ranges"
8181

8282
params[workload_name].append(
8383
TimeBasedReadParameters(

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads_regional/config.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ common:
33
- "async_json"
44
- "async_grpc_dp"
55
- "async_grpc_cp"
6-
file_sizes_mib:
7-
- 10240 # 10GiB
6+
file_sizes:
7+
- 10737418240 # 10GiB in bytes
88
chunk_sizes_kib: [64]
99
num_ranges: [1]
1010
rounds: 1
@@ -23,5 +23,10 @@ workload:
2323
coros: [1, 16]
2424
processes: [1]
2525

26+
- name: "read_whole_multi_process"
27+
pattern: "whole"
28+
coros: [1]
29+
processes: [1]
30+
2631
defaults:
2732
DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb"

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads_regional/test_reads.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,15 @@ def _cleanup_session():
9191

9292
async def _download_time_based_json_async(session, filename, params):
9393
"""Performs time-based downloads using the JSON API via aiohttp."""
94+
if params.pattern == "whole":
95+
url = f"https://storage.googleapis.com/storage/v1/b/{params.bucket_name}/o/{filename}?alt=media"
96+
headers = {
97+
"Authorization": f"Bearer {token}",
98+
}
99+
async with session.get(url, headers=headers) as response:
100+
data = await response.read()
101+
return len(data)
102+
94103
total_bytes_downloaded = 0
95104
offset = 0
96105
is_warming_up = True
@@ -137,6 +146,12 @@ async def _download_time_based_async(client, filename, params):
137146
mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename)
138147
await mrd.open()
139148

149+
if params.pattern == "whole":
150+
ranges = [(0, params.file_size_bytes, BytesIO())]
151+
await mrd.download_ranges(ranges)
152+
await mrd.close()
153+
return ranges[0][2].getbuffer().nbytes
154+
140155
async def _worker_coro():
141156
total_bytes_downloaded = 0
142157
offset = 0
@@ -224,21 +239,28 @@ def target_wrapper(*args, **kwargs):
224239
download_bytes_list.append(download_files_mp_mc_wrapper(pool, *args, **kwargs))
225240
return
226241

242+
duration_pedantic = 0
227243
try:
228244
with monitor() as m:
245+
start_pedantic = time.monotonic()
229246
benchmark.pedantic(
230247
target=target_wrapper,
231248
iterations=1,
232249
rounds=params.rounds,
233250
args=(files_names, params, params.read_type),
234251
)
252+
end_pedantic = time.monotonic()
253+
duration_pedantic = end_pedantic - start_pedantic
235254
finally:
236255
pool.close()
237256
pool.join()
238257
total_bytes_downloaded = sum(download_bytes_list)
239-
throughput_mib_s = (
240-
total_bytes_downloaded / params.duration / params.rounds
241-
) / (1024 * 1024)
258+
if params.pattern == "whole":
259+
throughput_mib_s = (total_bytes_downloaded / duration_pedantic) / (1024 * 1024)
260+
else:
261+
throughput_mib_s = (
262+
total_bytes_downloaded / params.duration / params.rounds
263+
) / (1024 * 1024)
242264
benchmark.extra_info["avg_throughput_mib_s"] = f"{throughput_mib_s:.2f}"
243265
print(
244266
f"Avg Throughput of {params.rounds} round(s): {throughput_mib_s:.2f} MiB/s"

0 commit comments

Comments
 (0)