Skip to content

Commit f14e7aa

Browse files
Tighten bracketed-host parsing to RFC 3986 / RFC 6874 strict semantics
RFC 3986 §3.2.2 reserves bracket syntax for IPv6 literals (``IP-literal = IPv6address / IPvFuture``). The parser previously extracted the bracket contents and forwarded them to ``_canonicalize_host`` without enforcement, so ``[127.0.0.1]:9001``, ``[localhost]:9001``, and ``[example.com]:9001`` all silently succeeded. The raw ``address`` string is stored verbatim on ``DqliteConnection._address`` and surfaces in repr / logs / exception messages, so log audits and string-based diagnostics fragmented across two surface forms for the same logical host. Validate the bracket contents with ``ipaddress.ip_address`` and require an ``IPv6Address`` (not ``IPv4Address``); reject empty and whitespace-only contents. RFC 6874 specifies that IPv6 zone identifiers may be percent-encoded in URIs (``%25`` escapes the literal ``%`` zone separator). The application-form ``[fe80::1%eth0]`` and the URI-form ``[fe80::1%25eth0]`` refer to the same logical zone but were treated as distinct host strings. ``urllib.parse.unquote`` decodes the zone suffix so both surface variants canonicalise to the same tuple — allowlist policies holding one form match either.
1 parent 52c4fa5 commit f14e7aa

2 files changed

Lines changed: 97 additions & 2 deletions

File tree

src/dqliteclient/connection.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -628,14 +628,56 @@ def _parse_address(address: str) -> tuple[str, int]:
628628
non-ASCII, empty) raise ``ValueError``.
629629
"""
630630
if address.startswith("["):
631-
# Bracketed IPv6: [host]:port
631+
# Bracketed IPv6: [host]:port. RFC 3986 §3.2.2 reserves the
632+
# bracket form for ``IP-literal = IPv6address / IPvFuture``;
633+
# bracketed IPv4 / hostname / empty contents are malformed
634+
# surface variants that must be rejected so log audits and
635+
# allowlists do not split across two distinct strings for
636+
# the same logical host.
632637
if "]:" not in address:
633638
raise ValueError(
634639
f"Invalid IPv6 address format: expected '[host]:port', got {address!r}"
635640
)
636641
bracket_end = address.index("]")
637642
host = address[1:bracket_end]
638-
port_str = address[bracket_end + 2 :] # Skip ']:
643+
port_str = address[bracket_end + 2 :] # Skip ']:'
644+
645+
# RFC 6874: zone identifiers may be percent-encoded as ``%25``
646+
# in URIs. Percent-decode the zone-ID portion (everything
647+
# after the first ``%``) so the URI form
648+
# ``[fe80::1%25eth0]`` and the application form
649+
# ``[fe80::1%eth0]`` canonicalise identically. Use
650+
# ``urllib.parse.unquote`` so any RFC-3986 percent-encoded
651+
# octet survives correctly (in practice only ``%25`` is
652+
# expected, but the unquote is harmless on already-decoded
653+
# input).
654+
from urllib.parse import unquote
655+
656+
zone_sep = host.find("%")
657+
if zone_sep != -1:
658+
# Decode the entire ``%...`` suffix so ``%25`` collapses
659+
# to a literal ``%`` (matching the application-form zone
660+
# separator). ``unquote`` leaves a literal ``%`` followed
661+
# by non-hex characters intact, so the no-encoding path
662+
# round-trips byte-for-byte.
663+
host = host[:zone_sep] + unquote(host[zone_sep:])
664+
665+
# Strict bracket discipline: validate the contents are an
666+
# IPv6 literal. ``ipaddress.ip_address`` does not accept
667+
# the ``%zone`` suffix, so strip it before the check.
668+
ipv6_part = host.split("%", 1)[0]
669+
try:
670+
parsed = ipaddress.ip_address(ipv6_part)
671+
except ValueError as e:
672+
raise ValueError(
673+
f"Bracket syntax in {address!r} is reserved for IPv6 "
674+
f"literals; {host!r} is not an IPv6 address"
675+
) from e
676+
if not isinstance(parsed, ipaddress.IPv6Address):
677+
raise ValueError(
678+
f"Bracket syntax in {address!r} is reserved for IPv6 "
679+
f"literals; got {type(parsed).__name__}"
680+
)
639681
else:
640682
if ":" not in address:
641683
raise ValueError(f"Invalid address format: expected 'host:port', got {address!r}")
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""Pin: bracket syntax in ``_parse_address`` is reserved for IPv6
2+
literals (RFC 3986 §3.2.2). Bracketed IPv4 / hostname / empty
3+
contents must be rejected.
4+
5+
Pin: IPv6 zone identifiers in bracketed form percent-decode per
6+
RFC 6874 (`%25` escapes the literal `%`). Both surface variants of
7+
the same logical zone canonicalise to the same tuple.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import pytest
13+
14+
from dqliteclient.connection import _parse_address
15+
16+
17+
class TestBracketedNonIpv6Rejected:
18+
@pytest.mark.parametrize(
19+
"addr",
20+
[
21+
"[127.0.0.1]:9001",
22+
"[localhost]:9001",
23+
"[example.com]:9001",
24+
"[]:9001",
25+
"[ ]:9001",
26+
],
27+
)
28+
def test_bracketed_non_ipv6_raises(self, addr: str) -> None:
29+
with pytest.raises(ValueError):
30+
_parse_address(addr)
31+
32+
def test_real_ipv6_still_parses(self) -> None:
33+
assert _parse_address("[::1]:9001") == ("::1", 9001)
34+
assert _parse_address("[fe80::1]:9001") == ("fe80::1", 9001)
35+
assert _parse_address("[2001:db8::1]:9001") == ("2001:db8::1", 9001)
36+
37+
38+
class TestIpv6ZoneIdPercentEncoding:
39+
def test_zone_id_decoded_canonicalises_two_surface_forms(self) -> None:
40+
"""RFC 6874: the URI-form ``%25eth0`` and the application-form
41+
``%eth0`` must canonicalise to the same tuple so allowlist
42+
policies match either surface variant."""
43+
a = _parse_address("[fe80::1%eth0]:9001")
44+
b = _parse_address("[fe80::1%25eth0]:9001")
45+
assert a == b
46+
assert "%eth0" in a[0]
47+
# Post-decode form must NOT contain the URI-encoded sequence.
48+
assert "%25" not in a[0]
49+
50+
def test_unencoded_zone_id_still_works(self) -> None:
51+
host, port = _parse_address("[fe80::1%eth0]:9001")
52+
assert host == "fe80::1%eth0"
53+
assert port == 9001

0 commit comments

Comments
 (0)