Skip to content

Commit 8349ca3

Browse files
committed
refactor: enhance expression handling by introducing _to_raw_expr function and updating _ensure_expr
1 parent 5145a6b commit 8349ca3

2 files changed

Lines changed: 34 additions & 30 deletions

File tree

python/datafusion/dataframe.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
EXPR_TYPE_ERROR,
4545
Expr,
4646
SortKey,
47+
_to_raw_expr,
4748
expr_list_to_raw_expr_list,
4849
sort_list_to_raw_sort_list,
4950
)
@@ -292,21 +293,19 @@ def __init__(
292293
self.bloom_filter_ndv = bloom_filter_ndv
293294

294295

295-
def _ensure_expr(value: Expr) -> expr_internal.Expr:
296+
def _ensure_expr(value: Expr | str) -> expr_internal.Expr:
296297
"""Return the internal expression or raise ``TypeError`` if invalid.
297298
298299
Args:
299-
value: Candidate expression.
300+
value: Candidate expression or column name.
300301
301302
Returns:
302303
The internal expression representation.
303304
304305
Raises:
305-
TypeError: If ``value`` is not an instance of :class:`Expr`.
306+
TypeError: If ``value`` is not an instance of :class:`Expr` or ``str``.
306307
"""
307-
if not isinstance(value, Expr):
308-
raise TypeError(EXPR_TYPE_ERROR)
309-
return value.expr
308+
return _to_raw_expr(value)
310309

311310

312311
class DataFrame:

python/datafusion/expr.py

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,29 @@
222222
]
223223

224224

225+
def _to_raw_expr(value: Expr | str) -> expr_internal.Expr:
226+
"""Convert a Python expression or column name to its raw variant.
227+
228+
Args:
229+
value: Candidate expression or column name.
230+
231+
Returns:
232+
The internal :class:`~datafusion._internal.expr.Expr` representation.
233+
234+
Raises:
235+
TypeError: If ``value`` is neither an :class:`Expr` nor ``str``.
236+
"""
237+
if isinstance(value, str):
238+
return Expr.column(value).expr
239+
if isinstance(value, Expr):
240+
return value.expr
241+
error = (
242+
"Expected Expr or column name, found:"
243+
f" {type(value).__name__}. {EXPR_TYPE_ERROR}."
244+
)
245+
raise TypeError(error)
246+
247+
225248
def expr_list_to_raw_expr_list(
226249
expr_list: Optional[Sequence[Expr | str] | Expr | str],
227250
) -> Optional[list[expr_internal.Expr]]:
@@ -230,23 +253,11 @@ def expr_list_to_raw_expr_list(
230253
expr_list = [expr_list]
231254
if expr_list is None:
232255
return None
233-
raw_exprs: list[expr_internal.Expr] = []
234-
for e in expr_list:
235-
if isinstance(e, str):
236-
raw_exprs.append(Expr.column(e).expr)
237-
elif isinstance(e, Expr):
238-
raw_exprs.append(e.expr)
239-
else:
240-
error = (
241-
"Expected Expr or column name, found:"
242-
f" {type(e).__name__}. {EXPR_TYPE_ERROR}."
243-
)
244-
raise TypeError(error)
245-
return raw_exprs
256+
return [_to_raw_expr(e) for e in expr_list]
246257

247258

248259
def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr:
249-
"""Helper function to return a default Sort if an Expr is provided."""
260+
"""Return a :class:`SortExpr`, defaulting attributes when necessary."""
250261
if isinstance(e, SortExpr):
251262
return e.raw_sort
252263
return SortExpr(e, ascending=True, nulls_first=True).raw_sort
@@ -262,17 +273,11 @@ def sort_list_to_raw_sort_list(
262273
return None
263274
raw_sort_list = []
264275
for item in sort_list:
265-
if isinstance(item, str):
266-
expr_obj = Expr.column(item)
267-
elif isinstance(item, (Expr, SortExpr)):
268-
expr_obj = item
276+
if isinstance(item, SortExpr):
277+
raw_sort_list.append(sort_or_default(item))
269278
else:
270-
error = (
271-
"Expected Expr or column name, found:"
272-
f" {type(item).__name__}. {EXPR_TYPE_ERROR}."
273-
)
274-
raise TypeError(error)
275-
raw_sort_list.append(sort_or_default(expr_obj))
279+
raw_expr = _to_raw_expr(item) # may raise ``TypeError``
280+
raw_sort_list.append(sort_or_default(Expr(raw_expr)))
276281
return raw_sort_list
277282

278283

0 commit comments

Comments
 (0)