Skip to content

Commit 89a0df4

Browse files
committed
refactor: improve expression handling by adding _to_raw_expr function and updating _ensure_expr
1 parent 5b456ea commit 89a0df4

2 files changed

Lines changed: 37 additions & 27 deletions

File tree

python/datafusion/dataframe.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
EXPR_TYPE_ERROR,
4545
Expr,
4646
SortKey,
47+
_to_raw_expr,
4748
expr_list_to_raw_expr_list,
4849
sort_list_to_raw_sort_list,
4950
)
@@ -292,8 +293,12 @@ def __init__(
292293
self.bloom_filter_ndv = bloom_filter_ndv
293294

294295

295-
def _ensure_expr(value: Expr) -> expr_internal.Expr:
296-
"""Return the internal expression or raise ``TypeError`` if invalid.
296+
def _ensure_expr(value: Expr | str) -> expr_internal.Expr:
297+
"""Return the internal expression from ``Expr`` or raise ``TypeError``.
298+
299+
This helper rejects plain strings so higher level APIs consistently
300+
require explicit :func:`~datafusion.col` or :func:`~datafusion.lit`
301+
expressions.
297302
298303
Args:
299304
value: Candidate expression.
@@ -306,7 +311,7 @@ def _ensure_expr(value: Expr) -> expr_internal.Expr:
306311
"""
307312
if not isinstance(value, Expr):
308313
raise TypeError(EXPR_TYPE_ERROR)
309-
return value.expr
314+
return _to_raw_expr(value)
310315

311316

312317
class DataFrame:

python/datafusion/expr.py

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,29 @@
222222
]
223223

224224

225+
def _to_raw_expr(value: Expr | str) -> expr_internal.Expr:
226+
"""Convert a Python expression or column name to its raw variant.
227+
228+
Args:
229+
value: Candidate expression or column name.
230+
231+
Returns:
232+
The internal :class:`~datafusion._internal.expr.Expr` representation.
233+
234+
Raises:
235+
TypeError: If ``value`` is neither an :class:`Expr` nor ``str``.
236+
"""
237+
if isinstance(value, str):
238+
return Expr.column(value).expr
239+
if isinstance(value, Expr):
240+
return value.expr
241+
error = (
242+
"Expected Expr or column name, found:"
243+
f" {type(value).__name__}. {EXPR_TYPE_ERROR}."
244+
)
245+
raise TypeError(error)
246+
247+
225248
def expr_list_to_raw_expr_list(
226249
expr_list: Optional[Sequence[Expr | str] | Expr | str],
227250
) -> Optional[list[expr_internal.Expr]]:
@@ -230,23 +253,11 @@ def expr_list_to_raw_expr_list(
230253
expr_list = [expr_list]
231254
if expr_list is None:
232255
return None
233-
raw_exprs: list[expr_internal.Expr] = []
234-
for e in expr_list:
235-
if isinstance(e, str):
236-
raw_exprs.append(Expr.column(e).expr)
237-
elif isinstance(e, Expr):
238-
raw_exprs.append(e.expr)
239-
else:
240-
error = (
241-
"Expected Expr or column name, found:"
242-
f" {type(e).__name__}. {EXPR_TYPE_ERROR}."
243-
)
244-
raise TypeError(error)
245-
return raw_exprs
256+
return [_to_raw_expr(e) for e in expr_list]
246257

247258

248259
def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr:
249-
"""Helper function to return a default Sort if an Expr is provided."""
260+
"""Return a :class:`SortExpr`, defaulting attributes when necessary."""
250261
if isinstance(e, SortExpr):
251262
return e.raw_sort
252263
return SortExpr(e, ascending=True, nulls_first=True).raw_sort
@@ -262,17 +273,11 @@ def sort_list_to_raw_sort_list(
262273
return None
263274
raw_sort_list = []
264275
for item in sort_list:
265-
if isinstance(item, str):
266-
expr_obj = Expr.column(item)
267-
elif isinstance(item, (Expr, SortExpr)):
268-
expr_obj = item
276+
if isinstance(item, SortExpr):
277+
raw_sort_list.append(sort_or_default(item))
269278
else:
270-
error = (
271-
"Expected Expr or column name, found:"
272-
f" {type(item).__name__}. {EXPR_TYPE_ERROR}."
273-
)
274-
raise TypeError(error)
275-
raw_sort_list.append(sort_or_default(expr_obj))
279+
raw_expr = _to_raw_expr(item) # may raise ``TypeError``
280+
raw_sort_list.append(sort_or_default(Expr(raw_expr)))
276281
return raw_sort_list
277282

278283

0 commit comments

Comments
 (0)