Skip to content

Commit 7d3178d

Browse files
committed
fix(storage): correct off-by-one in BlobReader chunked range fetch
1 parent 53f72fb commit 7d3178d

2 files changed

Lines changed: 54 additions & 19 deletions

File tree

packages/google-cloud-storage/google/cloud/storage/fileio.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,9 @@ def read(self, size=-1):
134134
fetch_start = self._pos
135135
if size > 0:
136136
# Fetch the larger of self._chunk_size or the remaining_size.
137-
fetch_end = fetch_start + max(remaining_size, self._chunk_size)
137+
# end is inclusive in download_as_bytes,
138+
# so subtract 1 to fetch exactly chunk_size or remaining_size bytes.
139+
fetch_end = fetch_start + max(remaining_size, self._chunk_size) - 1
138140
else:
139141
fetch_end = None
140142

packages/google-cloud-storage/tests/unit/test_fileio.py

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def test_read(self):
7171
blob = mock.Mock()
7272

7373
def read_from_fake_data(start=0, end=None, **_):
74-
return TEST_BINARY_DATA[start:end]
74+
# end is inclusive in download_as_bytes.
75+
stop = end + 1 if end is not None else None
76+
return TEST_BINARY_DATA[start:stop]
7577

7678
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
7779
download_kwargs = {"if_metageneration_match": 1}
@@ -80,7 +82,7 @@ def read_from_fake_data(start=0, end=None, **_):
8082
# Read and trigger the first download of chunk_size.
8183
self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1])
8284
blob.download_as_bytes.assert_called_once_with(
83-
start=0, end=8, checksum=None, retry=DEFAULT_RETRY, **download_kwargs
85+
start=0, end=7, checksum=None, retry=DEFAULT_RETRY, **download_kwargs
8486
)
8587

8688
# Read from buffered data only.
@@ -92,15 +94,15 @@ def read_from_fake_data(start=0, end=None, **_):
9294
self.assertEqual(reader._pos, 12)
9395
self.assertEqual(blob.download_as_bytes.call_count, 2)
9496
blob.download_as_bytes.assert_called_with(
95-
start=8, end=16, checksum=None, retry=DEFAULT_RETRY, **download_kwargs
97+
start=8, end=15, checksum=None, retry=DEFAULT_RETRY, **download_kwargs
9698
)
9799

98100
# Read a larger amount, requiring a download larger than chunk_size.
99101
self.assertEqual(reader.read(16), TEST_BINARY_DATA[12:28])
100102
self.assertEqual(reader._pos, 28)
101103
self.assertEqual(blob.download_as_bytes.call_count, 3)
102104
blob.download_as_bytes.assert_called_with(
103-
start=16, end=28, checksum=None, retry=DEFAULT_RETRY, **download_kwargs
105+
start=16, end=27, checksum=None, retry=DEFAULT_RETRY, **download_kwargs
104106
)
105107

106108
# Read all remaining data.
@@ -112,11 +114,26 @@ def read_from_fake_data(start=0, end=None, **_):
112114

113115
reader.close()
114116

117+
def test_read_uses_inclusive_end_range(self):
118+
# end is inclusive in download_as_bytes,
119+
# so a chunk_size=N read must request end=N-1.
120+
blob = mock.Mock()
121+
blob.download_as_bytes = mock.Mock(return_value=b"")
122+
reader = self._make_blob_reader(blob, chunk_size=64)
123+
reader.read(1)
124+
blob.download_as_bytes.assert_called_once()
125+
_, kwargs = blob.download_as_bytes.call_args
126+
self.assertEqual(kwargs["start"], 0)
127+
self.assertEqual(kwargs["end"], 63)
128+
reader.close()
129+
115130
def test_read_with_raw_download(self):
116131
blob = mock.Mock()
117132

118133
def read_from_fake_data(start=0, end=None, **_):
119-
return TEST_BINARY_DATA[start:end]
134+
# end is inclusive in download_as_bytes.
135+
stop = end + 1 if end is not None else None
136+
return TEST_BINARY_DATA[start:stop]
120137

121138
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
122139
download_kwargs = {"raw_download": True}
@@ -125,7 +142,7 @@ def read_from_fake_data(start=0, end=None, **_):
125142
# Read and trigger the first download of chunk_size.
126143
self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1])
127144
blob.download_as_bytes.assert_called_once_with(
128-
start=0, end=8, checksum=None, retry=DEFAULT_RETRY, raw_download=True
145+
start=0, end=7, checksum=None, retry=DEFAULT_RETRY, raw_download=True
129146
)
130147

131148
reader.close()
@@ -134,7 +151,9 @@ def test_retry_passed_through(self):
134151
blob = mock.Mock()
135152

136153
def read_from_fake_data(start=0, end=None, **_):
137-
return TEST_BINARY_DATA[start:end]
154+
# end is inclusive in download_as_bytes.
155+
stop = end + 1 if end is not None else None
156+
return TEST_BINARY_DATA[start:stop]
138157

139158
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
140159
download_kwargs = {"if_metageneration_match": 1}
@@ -145,7 +164,7 @@ def read_from_fake_data(start=0, end=None, **_):
145164
# Read and trigger the first download of chunk_size.
146165
self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1])
147166
blob.download_as_bytes.assert_called_once_with(
148-
start=0, end=8, checksum=None, retry=None, **download_kwargs
167+
start=0, end=7, checksum=None, retry=None, **download_kwargs
149168
)
150169

151170
reader.close()
@@ -163,22 +182,24 @@ def test_readline(self):
163182
blob = mock.Mock()
164183

165184
def read_from_fake_data(start=0, end=None, **_):
166-
return TEST_BINARY_DATA[start:end]
185+
# end is inclusive in download_as_bytes.
186+
stop = end + 1 if end is not None else None
187+
return TEST_BINARY_DATA[start:stop]
167188

168189
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
169190
reader = self._make_blob_reader(blob, chunk_size=10)
170191

171192
# Read a line. With chunk_size=10, expect three chunks downloaded.
172193
self.assertEqual(reader.readline(), TEST_BINARY_DATA[:27])
173194
blob.download_as_bytes.assert_called_with(
174-
start=20, end=30, checksum=None, retry=DEFAULT_RETRY
195+
start=20, end=29, checksum=None, retry=DEFAULT_RETRY
175196
)
176197
self.assertEqual(blob.download_as_bytes.call_count, 3)
177198

178199
# Read another line.
179200
self.assertEqual(reader.readline(), TEST_BINARY_DATA[27:])
180201
blob.download_as_bytes.assert_called_with(
181-
start=50, end=60, checksum=None, retry=DEFAULT_RETRY
202+
start=50, end=59, checksum=None, retry=DEFAULT_RETRY
182203
)
183204
self.assertEqual(blob.download_as_bytes.call_count, 6)
184205

@@ -189,7 +210,7 @@ def read_from_fake_data(start=0, end=None, **_):
189210
self.assertEqual(b"".join(reader.readlines()), TEST_BINARY_DATA)
190211
blob.download_as_bytes.assert_called_with(
191212
start=len(TEST_BINARY_DATA),
192-
end=len(TEST_BINARY_DATA) + 10,
213+
end=len(TEST_BINARY_DATA) + 9,
193214
checksum=None,
194215
retry=DEFAULT_RETRY,
195216
)
@@ -201,7 +222,9 @@ def test_seek(self):
201222
blob = mock.Mock()
202223

203224
def read_from_fake_data(start=0, end=None, **_):
204-
return TEST_BINARY_DATA[start:end]
225+
# end is inclusive in download_as_bytes.
226+
stop = end + 1 if end is not None else None
227+
return TEST_BINARY_DATA[start:stop]
205228

206229
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
207230
blob.size = None
@@ -254,7 +277,9 @@ def test_advanced_seek(self):
254277
blob = mock.Mock()
255278

256279
def read_from_fake_data(start=0, end=None, **_):
257-
return TEST_BINARY_DATA[start:end] * 1024
280+
# end is inclusive in download_as_bytes.
281+
stop = end + 1 if end is not None else None
282+
return TEST_BINARY_DATA[start:stop] * 1024
258283

259284
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
260285
blob.size = None
@@ -758,7 +783,9 @@ def test_read(self):
758783
blob = mock.Mock()
759784

760785
def read_from_fake_data(start=0, end=None, **_):
761-
return TEST_TEXT_DATA.encode("utf-8")[start:end]
786+
# end is inclusive in download_as_bytes.
787+
stop = end + 1 if end is not None else None
788+
return TEST_TEXT_DATA.encode("utf-8")[start:stop]
762789

763790
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
764791
blob.chunk_size = None
@@ -789,7 +816,9 @@ def test_multibyte_read(self):
789816
blob = mock.Mock()
790817

791818
def read_from_fake_data(start=0, end=None, **_):
792-
return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:end]
819+
# end is inclusive in download_as_bytes.
820+
stop = end + 1 if end is not None else None
821+
return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:stop]
793822

794823
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
795824
blob.chunk_size = None
@@ -820,7 +849,9 @@ def test_seek(self):
820849
blob = mock.Mock()
821850

822851
def read_from_fake_data(start=0, end=None, **_):
823-
return TEST_TEXT_DATA.encode("utf-8")[start:end]
852+
# end is inclusive in download_as_bytes.
853+
stop = end + 1 if end is not None else None
854+
return TEST_TEXT_DATA.encode("utf-8")[start:stop]
824855

825856
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
826857
blob.size = None
@@ -853,7 +884,9 @@ def test_multibyte_seek(self):
853884
blob = mock.Mock()
854885

855886
def read_from_fake_data(start=0, end=None, **_):
856-
return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:end]
887+
# end is inclusive in download_as_bytes.
888+
stop = end + 1 if end is not None else None
889+
return TEST_MULTIBYTE_TEXT_DATA.encode("utf-8")[start:stop]
857890

858891
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
859892
blob.size = None

0 commit comments

Comments
 (0)