Skip to content

Commit c720298

Browse files
committed
revert branch UNPICK
1 parent 0daf438 commit c720298

3 files changed

Lines changed: 12 additions & 81 deletions

File tree

docs/source/user-guide/dataframe/index.rst

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -126,49 +126,6 @@ DataFusion's DataFrame API offers a wide range of operations:
126126
# Drop columns
127127
df = df.drop("temporary_column")
128128
129-
String Columns and Expressions
130-
------------------------------
131-
132-
Some ``DataFrame`` methods accept plain strings when an argument refers to an
133-
existing column. These include:
134-
135-
* :py:meth:`~datafusion.DataFrame.select`
136-
* :py:meth:`~datafusion.DataFrame.sort`
137-
* :py:meth:`~datafusion.DataFrame.drop`
138-
* :py:meth:`~datafusion.DataFrame.join` (``on`` argument)
139-
* :py:meth:`~datafusion.DataFrame.aggregate` (grouping columns)
140-
141-
For such methods, you can pass column names directly:
142-
143-
.. code-block:: python
144-
145-
from datafusion import col, functions as f
146-
147-
df.sort('id')
148-
df.aggregate('id', [f.count(col('value'))])
149-
150-
The same operation can also be written with an explicit column expression:
151-
152-
.. code-block:: python
153-
154-
from datafusion import col, functions as f
155-
156-
df.sort(col('id'))
157-
df.aggregate(col('id'), [f.count(col('value'))])
158-
159-
Whenever an argument represents an expression—such as in
160-
:py:meth:`~datafusion.DataFrame.filter` or
161-
:py:meth:`~datafusion.DataFrame.with_column`—use ``col()`` to reference columns
162-
and wrap constant values with ``lit()`` (also available as ``literal()``):
163-
164-
.. code-block:: python
165-
166-
from datafusion import col, lit
167-
df.filter(col('age') > lit(21))
168-
169-
Without ``lit()`` DataFusion would treat ``21`` as a column name rather than a
170-
constant value.
171-
172129
Terminal Operations
173130
-------------------
174131

python/datafusion/dataframe.py

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -426,12 +426,6 @@ def filter(self, *predicates: Expr) -> DataFrame:
426426
"""
427427
df = self.df
428428
for p in predicates:
429-
if not isinstance(p, Expr):
430-
msg = (
431-
f"Expected Expr, got {type(p).__name__}. "
432-
"Use col() or lit() to construct expressions."
433-
)
434-
raise TypeError(msg)
435429
df = df.filter(p.expr)
436430
return DataFrame(df)
437431

@@ -509,44 +503,37 @@ def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame:
509503
return DataFrame(self.df.with_column_renamed(old_name, new_name))
510504

511505
def aggregate(
512-
self,
513-
group_by: list[Expr | str] | Expr | str,
514-
aggs: list[Expr] | Expr,
506+
self, group_by: list[Expr] | Expr, aggs: list[Expr] | Expr
515507
) -> DataFrame:
516508
"""Aggregates the rows of the current DataFrame.
517509
518510
Args:
519-
group_by: List of expressions or column names to group by.
511+
group_by: List of expressions to group by.
520512
aggs: List of expressions to aggregate.
521513
522514
Returns:
523515
DataFrame after aggregation.
524516
"""
525-
group_by_list = group_by if isinstance(group_by, list) else [group_by]
526-
aggs_list = aggs if isinstance(aggs, list) else [aggs]
517+
group_by = group_by if isinstance(group_by, list) else [group_by]
518+
aggs = aggs if isinstance(aggs, list) else [aggs]
527519

528-
group_by_exprs = [
529-
Expr.column(e).expr if isinstance(e, str) else e.expr for e in group_by_list
530-
]
531-
aggs_exprs = [e.expr for e in aggs_list]
532-
return DataFrame(self.df.aggregate(group_by_exprs, aggs_exprs))
520+
group_by = [e.expr for e in group_by]
521+
aggs = [e.expr for e in aggs]
522+
return DataFrame(self.df.aggregate(group_by, aggs))
533523

534-
def sort(self, *exprs: Expr | SortExpr | str) -> DataFrame:
535-
"""Sort the DataFrame by the specified sorting expressions or column names.
524+
def sort(self, *exprs: Expr | SortExpr) -> DataFrame:
525+
"""Sort the DataFrame by the specified sorting expressions.
536526
537527
Note that any expression can be turned into a sort expression by
538-
calling its ``sort`` method.
528+
calling its` ``sort`` method.
539529
540530
Args:
541-
exprs: Sort expressions or column names, applied in order.
531+
exprs: Sort expressions, applied in order.
542532
543533
Returns:
544534
DataFrame after sorting.
545535
"""
546-
exprs_raw = [
547-
sort_or_default(Expr.column(expr) if isinstance(expr, str) else expr)
548-
for expr in exprs
549-
]
536+
exprs_raw = [sort_or_default(expr) for expr in exprs]
550537
return DataFrame(self.df.sort(*exprs_raw))
551538

552539
def cast(self, mapping: dict[str, pa.DataType[Any]]) -> DataFrame:

python/tests/test_dataframe.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -268,19 +268,6 @@ def test_sort(df):
268268
assert table.to_pydict() == expected
269269

270270

271-
def test_sort_string_and_expression_equivalent(df):
272-
from datafusion import col
273-
274-
result_str = df.sort("a").to_pydict()
275-
result_expr = df.sort(col("a")).to_pydict()
276-
assert result_str == result_expr
277-
278-
279-
def test_filter_string_unsupported(df):
280-
with pytest.raises(TypeError, match=r"col\(\) or lit\(\)"):
281-
df.filter("a > 1")
282-
283-
284271
def test_drop(df):
285272
df = df.drop("c")
286273

0 commit comments

Comments
 (0)