Skip to content

Commit 5145a6b

Browse files
committed
docs: enhance DataFrame method docstrings to clarify predicate requirements and provide usage examples
1 parent 347111c commit 5145a6b

1 file changed

Lines changed: 39 additions & 11 deletions

File tree

python/datafusion/dataframe.py

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,17 @@ def filter(self, *predicates: Expr) -> DataFrame:
435435
"""Return a DataFrame for which ``predicate`` evaluates to ``True``.
436436
437437
Rows for which ``predicate`` evaluates to ``False`` or ``None`` are filtered
438-
out. If more than one predicate is provided, these predicates will be
439-
combined as a logical AND. If more complex logic is required, see the
440-
logical operations in :py:mod:`~datafusion.functions`.
438+
out. If more than one predicate is provided, these predicates will be
439+
combined as a logical AND. Each ``predicate`` must be an
440+
:class:`~datafusion.expr.Expr` created using helper functions such as
441+
:func:`datafusion.col` or :func:`datafusion.lit`; plain strings are not
442+
accepted. If more complex logic is required, see the logical operations in
443+
:py:mod:`~datafusion.functions`.
444+
445+
Example::
446+
447+
from datafusion import col, lit
448+
df.filter(col("a") > lit(1))
441449
442450
Args:
443451
predicates: Predicate expression(s) to filter the DataFrame.
@@ -453,6 +461,15 @@ def filter(self, *predicates: Expr) -> DataFrame:
453461
def with_column(self, name: str, expr: Expr) -> DataFrame:
454462
"""Add an additional column to the DataFrame.
455463
464+
The ``expr`` must be an :class:`~datafusion.expr.Expr` constructed with
465+
:func:`datafusion.col` or :func:`datafusion.lit`; plain strings are not
466+
accepted.
467+
468+
Example::
469+
470+
from datafusion import col, lit
471+
df.with_column("b", col("a") + lit(1))
472+
456473
Args:
457474
name: Name of the column to add.
458475
expr: Expression to compute the column.
@@ -467,16 +484,20 @@ def with_columns(
467484
) -> DataFrame:
468485
"""Add columns to the DataFrame.
469486
470-
By passing expressions, iteratables of expressions, or named expressions. To
471-
pass named expressions use the form name=Expr.
487+
By passing expressions, iteratables of expressions, or named expressions.
488+
All expressions must be :class:`~datafusion.expr.Expr` objects created via
489+
:func:`datafusion.col` or :func:`datafusion.lit`; plain strings are not
490+
accepted. To pass named expressions use the form ``name=Expr``.
472491
473-
Example usage: The following will add 4 columns labeled a, b, c, and d::
492+
Example usage: The following will add 4 columns labeled ``a``, ``b``, ``c``,
493+
and ``d``::
474494
495+
from datafusion import col, lit
475496
df = df.with_columns(
476-
lit(0).alias('a'),
477-
[lit(1).alias('b'), lit(2).alias('c')],
497+
col("x").alias("a"),
498+
[lit(1).alias("b"), col("y").alias("c")],
478499
d=lit(3)
479-
)
500+
)
480501
481502
Args:
482503
exprs: Either a single expression or an iterable of expressions to add.
@@ -779,8 +800,15 @@ def join_on(
779800
) -> DataFrame:
780801
"""Join two :py:class:`DataFrame` using the specified expressions.
781802
782-
On expressions are used to support in-equality predicates. Equality
783-
predicates are correctly optimized
803+
Join predicates must be :class:`~datafusion.expr.Expr` objects, typically
804+
built with :func:`datafusion.col`; plain strings are not accepted. On
805+
expressions are used to support in-equality predicates. Equality predicates
806+
are correctly optimized.
807+
808+
Example::
809+
810+
from datafusion import col
811+
df.join_on(other_df, col("id") == col("other_id"))
784812
785813
Args:
786814
right: Other DataFrame to join with.

0 commit comments

Comments
 (0)