Skip to content

Commit 3d382a0

Browse files
authored
Merge pull request #1146 from khushthecoder/GH1056-local-file-urls-escaped-paths
Fix #1056: Decode URL-escaped paths for local filesystems
2 parents 9ef3865 + b81fe6e commit 3d382a0

2 files changed

Lines changed: 54 additions & 0 deletions

File tree

malariagen_data/util.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,25 @@ def _init_filesystem(url, **kwargs):
494494
# Process the URL using fsspec.
495495
fs, path = url_to_fs(url, **storage_options)
496496

497+
# Decode URL-encoded paths for local filesystems.
498+
protocol = getattr(fs, "protocol", None)
499+
if isinstance(protocol, str):
500+
protocols = {protocol}
501+
elif isinstance(protocol, (tuple, list)):
502+
protocols = set(protocol)
503+
else:
504+
protocols = set()
505+
506+
is_local = (
507+
bool(protocols.intersection({"file", "local"}))
508+
or fs.__class__.__name__ == "LocalFileSystem"
509+
)
510+
511+
if is_local:
512+
from urllib.parse import unquote
513+
514+
path = unquote(path)
515+
497516
# Path compatibility, fsspec/gcsfs behaviour varies between versions.
498517
while path.endswith("/"):
499518
path = path[:-1]

tests/test_util_filesystem.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import os
2+
from malariagen_data.util import _init_filesystem
3+
4+
5+
def test_init_filesystem_decodes_file_uri_escaped_path(tmp_path):
6+
dir_with_space = tmp_path / "dir with space's"
7+
dir_with_space.mkdir()
8+
file_path = dir_with_space / "v3-config.json"
9+
file_path.write_text('{"foo": "bar"}')
10+
11+
uri = dir_with_space.as_uri()
12+
13+
fs, path = _init_filesystem(uri)
14+
15+
assert "%20" in uri
16+
assert "%20" not in path
17+
assert "%27" not in path
18+
19+
# Using local path with os.path.join should now succeed
20+
with fs.open(os.path.join(path, "v3-config.json"), "r") as f:
21+
assert f.read() == '{"foo": "bar"}'
22+
23+
24+
def test_init_filesystem_plain_local_path_unchanged(tmp_path):
25+
dir_with_space = tmp_path / "dir with space's"
26+
dir_with_space.mkdir()
27+
file_path = dir_with_space / "v3-config.json"
28+
file_path.write_text('{"foo": "bar"}')
29+
30+
uri = str(dir_with_space)
31+
32+
fs, path = _init_filesystem(uri)
33+
34+
with fs.open(os.path.join(path, "v3-config.json"), "r") as f:
35+
assert f.read() == '{"foo": "bar"}'

0 commit comments

Comments
 (0)