From 7d3178d167a2f42d42e600f1bbab5c3bbc3a3b2d Mon Sep 17 00:00:00 2001 From: Li Yonghui Date: Wed, 29 Apr 2026 22:03:37 +0800 Subject: [PATCH] fix(storage): correct off-by-one in BlobReader chunked range fetch --- .../google/cloud/storage/fileio.py | 4 +- .../tests/unit/test_fileio.py | 69 ++++++++++++++----- 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-storage/google/cloud/storage/fileio.py b/packages/google-cloud-storage/google/cloud/storage/fileio.py index d721b6e3de4f..1f8886a9d5be 100644 --- a/packages/google-cloud-storage/google/cloud/storage/fileio.py +++ b/packages/google-cloud-storage/google/cloud/storage/fileio.py @@ -134,7 +134,9 @@ def read(self, size=-1): fetch_start = self._pos if size > 0: # Fetch the larger of self._chunk_size or the remaining_size. - fetch_end = fetch_start + max(remaining_size, self._chunk_size) + # end is inclusive in download_as_bytes, + # so subtract 1 to fetch exactly chunk_size or remaining_size bytes. + fetch_end = fetch_start + max(remaining_size, self._chunk_size) - 1 else: fetch_end = None diff --git a/packages/google-cloud-storage/tests/unit/test_fileio.py b/packages/google-cloud-storage/tests/unit/test_fileio.py index 341d700152f1..10e9862399f2 100644 --- a/packages/google-cloud-storage/tests/unit/test_fileio.py +++ b/packages/google-cloud-storage/tests/unit/test_fileio.py @@ -71,7 +71,9 @@ def test_read(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_BINARY_DATA[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_BINARY_DATA[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) download_kwargs = {"if_metageneration_match": 1} @@ -80,7 +82,7 @@ def read_from_fake_data(start=0, end=None, **_): # Read and trigger the first download of chunk_size. self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1]) blob.download_as_bytes.assert_called_once_with( - start=0, end=8, checksum=None, retry=DEFAULT_RETRY, **download_kwargs + start=0, end=7, checksum=None, retry=DEFAULT_RETRY, **download_kwargs ) # Read from buffered data only. @@ -92,7 +94,7 @@ def read_from_fake_data(start=0, end=None, **_): self.assertEqual(reader._pos, 12) self.assertEqual(blob.download_as_bytes.call_count, 2) blob.download_as_bytes.assert_called_with( - start=8, end=16, checksum=None, retry=DEFAULT_RETRY, **download_kwargs + start=8, end=15, checksum=None, retry=DEFAULT_RETRY, **download_kwargs ) # Read a larger amount, requiring a download larger than chunk_size. @@ -100,7 +102,7 @@ def read_from_fake_data(start=0, end=None, **_): self.assertEqual(reader._pos, 28) self.assertEqual(blob.download_as_bytes.call_count, 3) blob.download_as_bytes.assert_called_with( - start=16, end=28, checksum=None, retry=DEFAULT_RETRY, **download_kwargs + start=16, end=27, checksum=None, retry=DEFAULT_RETRY, **download_kwargs ) # Read all remaining data. @@ -112,11 +114,26 @@ def read_from_fake_data(start=0, end=None, **_): reader.close() + def test_read_uses_inclusive_end_range(self): + # end is inclusive in download_as_bytes, + # so a chunk_size=N read must request end=N-1. + blob = mock.Mock() + blob.download_as_bytes = mock.Mock(return_value=b"") + reader = self._make_blob_reader(blob, chunk_size=64) + reader.read(1) + blob.download_as_bytes.assert_called_once() + _, kwargs = blob.download_as_bytes.call_args + self.assertEqual(kwargs["start"], 0) + self.assertEqual(kwargs["end"], 63) + reader.close() + def test_read_with_raw_download(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_BINARY_DATA[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_BINARY_DATA[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) download_kwargs = {"raw_download": True} @@ -125,7 +142,7 @@ def read_from_fake_data(start=0, end=None, **_): # Read and trigger the first download of chunk_size. self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1]) blob.download_as_bytes.assert_called_once_with( - start=0, end=8, checksum=None, retry=DEFAULT_RETRY, raw_download=True + start=0, end=7, checksum=None, retry=DEFAULT_RETRY, raw_download=True ) reader.close() @@ -134,7 +151,9 @@ def test_retry_passed_through(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_BINARY_DATA[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_BINARY_DATA[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) download_kwargs = {"if_metageneration_match": 1} @@ -145,7 +164,7 @@ def read_from_fake_data(start=0, end=None, **_): # Read and trigger the first download of chunk_size. self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1]) blob.download_as_bytes.assert_called_once_with( - start=0, end=8, checksum=None, retry=None, **download_kwargs + start=0, end=7, checksum=None, retry=None, **download_kwargs ) reader.close() @@ -163,7 +182,9 @@ def test_readline(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_BINARY_DATA[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_BINARY_DATA[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) reader = self._make_blob_reader(blob, chunk_size=10) @@ -171,14 +192,14 @@ def read_from_fake_data(start=0, end=None, **_): # Read a line. With chunk_size=10, expect three chunks downloaded. self.assertEqual(reader.readline(), TEST_BINARY_DATA[:27]) blob.download_as_bytes.assert_called_with( - start=20, end=30, checksum=None, retry=DEFAULT_RETRY + start=20, end=29, checksum=None, retry=DEFAULT_RETRY ) self.assertEqual(blob.download_as_bytes.call_count, 3) # Read another line. self.assertEqual(reader.readline(), TEST_BINARY_DATA[27:]) blob.download_as_bytes.assert_called_with( - start=50, end=60, checksum=None, retry=DEFAULT_RETRY + start=50, end=59, checksum=None, retry=DEFAULT_RETRY ) self.assertEqual(blob.download_as_bytes.call_count, 6) @@ -189,7 +210,7 @@ def read_from_fake_data(start=0, end=None, **_): self.assertEqual(b"".join(reader.readlines()), TEST_BINARY_DATA) blob.download_as_bytes.assert_called_with( start=len(TEST_BINARY_DATA), - end=len(TEST_BINARY_DATA) + 10, + end=len(TEST_BINARY_DATA) + 9, checksum=None, retry=DEFAULT_RETRY, ) @@ -201,7 +222,9 @@ def test_seek(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_BINARY_DATA[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_BINARY_DATA[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) blob.size = None @@ -254,7 +277,9 @@ def test_advanced_seek(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_BINARY_DATA[start:end] * 1024 + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_BINARY_DATA[start:stop] * 1024 blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) blob.size = None @@ -758,7 +783,9 @@ def test_read(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_TEXT_DATA.encode("utf-8")[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_TEXT_DATA.encode("utf-8")[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) blob.chunk_size = None @@ -789,7 +816,9 @@ def test_multibyte_read(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) blob.chunk_size = None @@ -820,7 +849,9 @@ def test_seek(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_TEXT_DATA.encode("utf-8")[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_TEXT_DATA.encode("utf-8")[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) blob.size = None @@ -853,7 +884,9 @@ def test_multibyte_seek(self): blob = mock.Mock() def read_from_fake_data(start=0, end=None, **_): - return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:end] + # end is inclusive in download_as_bytes. + stop = end + 1 if end is not None else None + return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:stop] blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) blob.size = None