Skip to content

Commit c94bcd2

Browse files
fix: raise EncodeError for non-str input to encode_text
encode_text relied on `"\x00" in value` to detect embedded nulls, which leaks raw TypeError for non-iterable inputs (None, int, float) and leaks misleading TypeErrors for bytes/bytearray/memoryview. Lone surrogates additionally leak UnicodeEncodeError from .encode("utf-8"). Add an isinstance(value, str) guard at the top of encode_text and wrap the UTF-8 encode step so both failure paths surface as EncodeError, matching the library's documented exception contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 68ef80f commit c94bcd2

2 files changed

Lines changed: 27 additions & 1 deletion

File tree

src/dqlitewire/types.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,17 @@ def pad_to_word(size: int) -> int:
8888

8989
def encode_text(value: str) -> bytes:
9090
"""Encode text as null-terminated UTF-8, padded to 8-byte boundary."""
91+
if not isinstance(value, str):
92+
raise EncodeError(f"encode_text expected str, got {type(value).__name__}")
9193
if "\x00" in value:
9294
raise EncodeError(
9395
f"Text value contains embedded null byte at position {value.index(chr(0))}; "
9496
"null-terminated encoding would lose data"
9597
)
96-
encoded = value.encode("utf-8") + b"\x00"
98+
try:
99+
encoded = value.encode("utf-8") + b"\x00"
100+
except UnicodeEncodeError as e:
101+
raise EncodeError(f"Text contains invalid UTF-8: {e}") from e
97102
padding = pad_to_word(len(encoded))
98103
return encoded + (b"\x00" * padding)
99104

tests/test_types.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,27 @@ def test_embedded_null_raises_encode_error(self) -> None:
234234
with pytest.raises(EncodeError, match="embedded null byte"):
235235
encode_text("\x00")
236236

237+
@pytest.mark.parametrize(
238+
"bad",
239+
[None, 42, b"x", 3.14, ["x"], bytearray(b"x"), memoryview(b"x")],
240+
)
241+
def test_non_str_raises_encode_error(self, bad: object) -> None:
242+
"""234: Non-string inputs must raise EncodeError, not raw TypeError.
243+
244+
Callers following the library's documented exception contract
245+
(``except EncodeError``) would otherwise miss these failures.
246+
"""
247+
with pytest.raises(EncodeError, match="expected str"):
248+
encode_text(bad) # type: ignore[arg-type]
249+
250+
def test_lone_surrogate_raises_encode_error(self) -> None:
251+
"""234: Lone UTF-16 surrogates are legal Python str but not UTF-8-encodable.
252+
253+
Must raise EncodeError (not the stdlib's UnicodeEncodeError).
254+
"""
255+
with pytest.raises(EncodeError, match="invalid UTF-8"):
256+
encode_text("\ud800")
257+
237258
def test_decode_not_terminated_fails(self) -> None:
238259
with pytest.raises(DecodeError):
239260
decode_text(b"hello")

0 commit comments

Comments
 (0)