Skip to content

Commit 8a9b171

Browse files
committed
Refactor expression type error handling in DataFrame and expr modules for improved clarity and consistency
1 parent 5bdb436 commit 8a9b171

2 files changed

Lines changed: 21 additions & 18 deletions

File tree

python/datafusion/dataframe.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,13 @@
4040
from datafusion._internal import DataFrame as DataFrameInternal
4141
from datafusion._internal import ParquetColumnOptions as ParquetColumnOptionsInternal
4242
from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
43-
from datafusion.expr import Expr, SortExpr, expr_list_to_raw_expr_list, sort_or_default
43+
from datafusion.expr import (
44+
_EXPR_TYPE_ERROR,
45+
Expr,
46+
SortExpr,
47+
expr_list_to_raw_expr_list,
48+
sort_or_default,
49+
)
4450
from datafusion.plan import ExecutionPlan, LogicalPlan
4551
from datafusion.record_batch import RecordBatchStream
4652

@@ -424,10 +430,9 @@ def filter(self, *predicates: Expr) -> DataFrame:
424430
"""
425431
df = self.df
426432
for p in predicates:
427-
if not isinstance(p, Expr):
428-
error = "Use col() or lit() to construct expressions"
429-
raise TypeError(error)
430-
df = df.filter(p.expr)
433+
if isinstance(p, str) or not isinstance(p, Expr):
434+
raise TypeError(_EXPR_TYPE_ERROR)
435+
df = df.filter(expr_list_to_raw_expr_list(p)[0])
431436
return DataFrame(df)
432437

433438
def with_column(self, name: str, expr: Expr) -> DataFrame:
@@ -441,8 +446,7 @@ def with_column(self, name: str, expr: Expr) -> DataFrame:
441446
DataFrame with the new column.
442447
"""
443448
if not isinstance(expr, Expr):
444-
error = "Use col() or lit() to construct expressions"
445-
raise TypeError(error)
449+
raise TypeError(_EXPR_TYPE_ERROR)
446450
return DataFrame(self.df.with_column(name, expr.expr))
447451

448452
def with_columns(
@@ -479,17 +483,14 @@ def _simplify_expression(
479483
elif isinstance(expr, Iterable) and not isinstance(expr, (str, Expr)):
480484
for inner_expr in expr:
481485
if not isinstance(inner_expr, Expr):
482-
error = "Use col() or lit() to construct expressions"
483-
raise TypeError(error)
486+
raise TypeError(_EXPR_TYPE_ERROR)
484487
expr_list.append(inner_expr.expr)
485488
else:
486-
error = "Use col() or lit() to construct expressions"
487-
raise TypeError(error)
489+
raise TypeError(_EXPR_TYPE_ERROR)
488490
if named_exprs:
489491
for alias, expr in named_exprs.items():
490492
if not isinstance(expr, Expr):
491-
error = "Use col() or lit() to construct expressions"
492-
raise TypeError(error)
493+
raise TypeError(_EXPR_TYPE_ERROR)
493494
expr_list.append(expr.alias(alias).expr)
494495
return expr_list
495496

@@ -535,8 +536,7 @@ def aggregate(
535536
aggs_exprs = []
536537
for agg in aggs_list:
537538
if not isinstance(agg, Expr):
538-
error = "Use col() or lit() to construct expressions"
539-
raise TypeError(error)
539+
raise TypeError(_EXPR_TYPE_ERROR)
540540
aggs_exprs.append(agg.expr)
541541
return DataFrame(self.df.aggregate(group_by_exprs, aggs_exprs))
542542

@@ -786,8 +786,7 @@ def join_on(
786786
exprs = []
787787
for expr in on_exprs:
788788
if not isinstance(expr, Expr):
789-
error = "Use col() or lit() to construct expressions"
790-
raise TypeError(error)
789+
raise TypeError(_EXPR_TYPE_ERROR)
791790
exprs.append(expr.expr)
792791
return DataFrame(self.df.join_on(right.df, exprs, how))
793792

python/datafusion/expr.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
if TYPE_CHECKING:
4040
from datafusion.plan import LogicalPlan
4141

42+
43+
# Standard error message for invalid expression types
44+
_EXPR_TYPE_ERROR = "Use col() or lit() to construct expressions"
45+
4246
# The following are imported from the internal representation. We may choose to
4347
# give these all proper wrappers, or to simply leave as is. These were added
4448
# in order to support passing the `test_imports` unit test.
@@ -232,7 +236,7 @@ def expr_list_to_raw_expr_list(
232236
else:
233237
error = (
234238
"Expected Expr or column name, found:"
235-
f" {type(e).__name__}. Use col() or lit() to construct expressions."
239+
f" {type(e).__name__}. {_EXPR_TYPE_ERROR}."
236240
)
237241
raise TypeError(error)
238242
return raw_exprs

0 commit comments

Comments
 (0)