Skip to content

Commit 6fafe9b

Browse files
fix: support pandas @variable syntax in query validator and fix CI failures
- Preprocess @variable references (e.g., `taxon in @taxon_list`) by replacing them with safe placeholder identifiers before AST parsing, since @ is pandas-specific syntax not valid in Python AST - Fix test_non_string_input to accept both UnsafeQueryError and TypeError (typeguard may intercept the type check first) - Add tests for @variable reference patterns Fixes 22 CI test failures caused by sample_query_options using @var syntax
1 parent e6ef1cf commit 6fafe9b

2 files changed

Lines changed: 24 additions & 2 deletions

File tree

malariagen_data/anoph/safe_query.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,14 @@
2121
"""
2222

2323
import ast
24+
import re
2425
from typing import Optional, Set
2526

27+
# Pattern matching pandas @variable references in query strings.
28+
# These are not valid Python but are a pandas feature for referencing
29+
# local/global variables via the `local_dict` or `global_dict` kwargs.
30+
_AT_VAR_PATTERN = re.compile(r"@([A-Za-z_][A-Za-z0-9_]*)")
31+
2632

2733
# AST node types that are safe in query expressions.
2834
_SAFE_NODE_TYPES = (
@@ -137,8 +143,14 @@ def validate_query(query: str, allowed_names: Optional[Set[str]] = None) -> None
137143
if not query:
138144
raise UnsafeQueryError("Query string must not be empty.")
139145

146+
# Replace pandas @variable references with plain identifiers so the
147+
# expression can be parsed as valid Python. The replaced names are
148+
# prefixed with ``_at_`` to avoid collisions with real column names
149+
# while remaining dunder-free.
150+
query_for_parse = _AT_VAR_PATTERN.sub(r"_at_\1", query)
151+
140152
try:
141-
tree = ast.parse(query, mode="eval")
153+
tree = ast.parse(query_for_parse, mode="eval")
142154
except SyntaxError as e:
143155
raise UnsafeQueryError(f"Query string is not a valid expression: {e}") from e
144156

tests/anoph/test_safe_query.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,16 @@ def test_list_literal_in(self):
7777
def test_whitespace_handling(self):
7878
validate_query(" country == 'Ghana' ")
7979

80+
def test_at_variable_reference(self):
81+
"""Pandas @var syntax for referencing local variables."""
82+
validate_query("sex_call in @sex_call_list")
83+
84+
def test_at_variable_in_compound(self):
85+
validate_query("taxon in @taxon_list and year > 2015")
86+
87+
def test_at_variable_equality(self):
88+
validate_query("country == @target_country")
89+
8090

8191
class TestValidateQueryRejectsMalicious:
8292
"""Ensure that code injection attempts are blocked."""
@@ -171,7 +181,7 @@ def test_whitespace_only(self):
171181
validate_query(" ")
172182

173183
def test_non_string_input(self):
174-
with pytest.raises(UnsafeQueryError, match="must be a string"):
184+
with pytest.raises((UnsafeQueryError, TypeError)):
175185
validate_query(123)
176186

177187
def test_syntax_error(self):

0 commit comments

Comments
 (0)