Skip to content

Commit 3585c11

Browse files
authored
minor: remove deprecated interfaces (#1481)
* udf module has been deprecated since DF47. html_formatter module has been deprecated since DF48. * database has been deprecated since DF48 * select_columns has been deprecated since DF43 * unnest_column has been deprecated since DF42 * display_name has been deprecated since DF42 * window() has been deprecated since DF50 * serde functions have been deprecated since DF42 * from_arrow_table and tables have been deprecated since DF42 * RuntimeConfig has been deprecated since DF44 * Update user documentation to remove deprecated function * update tpch examples for latest function uses * Remove unnecessary options in example * update rendering for the most recent dataframe_formatter instead of the deprecated html_formatter
1 parent ecd14c1 commit 3585c11

21 files changed

+154
-526
lines changed

crates/core/src/context.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,21 +1072,6 @@ impl PySessionContext {
10721072
self.ctx.catalog_names().into_iter().collect()
10731073
}
10741074

1075-
pub fn tables(&self) -> HashSet<String> {
1076-
self.ctx
1077-
.catalog_names()
1078-
.into_iter()
1079-
.filter_map(|name| self.ctx.catalog(&name))
1080-
.flat_map(move |catalog| {
1081-
catalog
1082-
.schema_names()
1083-
.into_iter()
1084-
.filter_map(move |name| catalog.schema(&name))
1085-
})
1086-
.flat_map(|schema| schema.table_names())
1087-
.collect()
1088-
}
1089-
10901075
pub fn table(&self, name: &str, py: Python) -> PyResult<PyDataFrame> {
10911076
let res = wait_for_future(py, self.ctx.table(name))
10921077
.map_err(|e| PyKeyError::new_err(e.to_string()))?;

crates/core/src/dataframe.rs

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -468,17 +468,17 @@ impl PyDataFrame {
468468
fn __getitem__(&self, key: Bound<'_, PyAny>) -> PyDataFusionResult<Self> {
469469
if let Ok(key) = key.extract::<PyBackedStr>() {
470470
// df[col]
471-
self.select_columns(vec![key])
471+
self.select_exprs(vec![key])
472472
} else if let Ok(tuple) = key.cast::<PyTuple>() {
473473
// df[col1, col2, col3]
474474
let keys = tuple
475475
.iter()
476476
.map(|item| item.extract::<PyBackedStr>())
477477
.collect::<PyResult<Vec<PyBackedStr>>>()?;
478-
self.select_columns(keys)
478+
self.select_exprs(keys)
479479
} else if let Ok(keys) = key.extract::<Vec<PyBackedStr>>() {
480480
// df[[col1, col2, col3]]
481-
self.select_columns(keys)
481+
self.select_exprs(keys)
482482
} else {
483483
let message = "DataFrame can only be indexed by string index or indices".to_string();
484484
Err(PyDataFusionError::Common(message))
@@ -554,13 +554,6 @@ impl PyDataFrame {
554554
Ok(PyTable::from(table_provider))
555555
}
556556

557-
#[pyo3(signature = (*args))]
558-
fn select_columns(&self, args: Vec<PyBackedStr>) -> PyDataFusionResult<Self> {
559-
let args = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
560-
let df = self.df.as_ref().clone().select_columns(&args)?;
561-
Ok(Self::new(df))
562-
}
563-
564557
#[pyo3(signature = (*args))]
565558
fn select_exprs(&self, args: Vec<PyBackedStr>) -> PyDataFusionResult<Self> {
566559
let args = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
@@ -890,22 +883,6 @@ impl PyDataFrame {
890883
Ok(Self::new(new_df))
891884
}
892885

893-
#[pyo3(signature = (column, preserve_nulls=true, recursions=None))]
894-
fn unnest_column(
895-
&self,
896-
column: &str,
897-
preserve_nulls: bool,
898-
recursions: Option<Vec<(String, String, usize)>>,
899-
) -> PyDataFusionResult<Self> {
900-
let unnest_options = build_unnest_options(preserve_nulls, recursions);
901-
let df = self
902-
.df
903-
.as_ref()
904-
.clone()
905-
.unnest_columns_with_options(&[column], unnest_options)?;
906-
Ok(Self::new(df))
907-
}
908-
909886
#[pyo3(signature = (columns, preserve_nulls=true, recursions=None))]
910887
fn unnest_columns(
911888
&self,

crates/core/src/functions.rs

Lines changed: 3 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,14 @@
1818
use std::collections::HashMap;
1919

2020
use datafusion::common::{Column, ScalarValue, TableReference};
21-
use datafusion::execution::FunctionRegistry;
22-
use datafusion::functions_aggregate::all_default_aggregate_functions;
23-
use datafusion::functions_window::all_default_window_functions;
24-
use datafusion::logical_expr::expr::{
25-
Alias, FieldMetadata, NullTreatment as DFNullTreatment, WindowFunction, WindowFunctionParams,
26-
};
27-
use datafusion::logical_expr::{Expr, ExprFunctionExt, WindowFrame, WindowFunctionDefinition, lit};
21+
use datafusion::logical_expr::expr::{Alias, FieldMetadata, NullTreatment as DFNullTreatment};
22+
use datafusion::logical_expr::{Expr, ExprFunctionExt, lit};
2823
use datafusion::{functions, functions_aggregate, functions_window};
2924
use pyo3::prelude::*;
3025
use pyo3::wrap_pyfunction;
3126

3227
use crate::common::data_type::{NullTreatment, PyScalarValue};
33-
use crate::context::PySessionContext;
34-
use crate::errors::{PyDataFusionError, PyDataFusionResult};
28+
use crate::errors::PyDataFusionResult;
3529
use crate::expr::PyExpr;
3630
use crate::expr::conditional_expr::PyCaseBuilder;
3731
use crate::expr::sort_expr::{PySortExpr, to_sort_expressions};
@@ -306,126 +300,6 @@ fn when(when: PyExpr, then: PyExpr) -> PyResult<PyCaseBuilder> {
306300
Ok(PyCaseBuilder::new(None).when(when, then))
307301
}
308302

309-
/// Helper function to find the appropriate window function.
310-
///
311-
/// Search procedure:
312-
/// 1) Search built in window functions, which are being deprecated.
313-
/// 1) If a session context is provided:
314-
/// 1) search User Defined Aggregate Functions (UDAFs)
315-
/// 1) search registered window functions
316-
/// 1) search registered aggregate functions
317-
/// 1) If no function has been found, search default aggregate functions.
318-
///
319-
/// NOTE: we search the built-ins first because the `UDAF` versions currently do not have the same behavior.
320-
fn find_window_fn(
321-
name: &str,
322-
ctx: Option<PySessionContext>,
323-
) -> PyDataFusionResult<WindowFunctionDefinition> {
324-
if let Some(ctx) = ctx {
325-
// search UDAFs
326-
let udaf = ctx
327-
.ctx
328-
.udaf(name)
329-
.map(WindowFunctionDefinition::AggregateUDF)
330-
.ok();
331-
332-
if let Some(udaf) = udaf {
333-
return Ok(udaf);
334-
}
335-
336-
let session_state = ctx.ctx.state();
337-
338-
// search registered window functions
339-
let window_fn = session_state
340-
.window_functions()
341-
.get(name)
342-
.map(|f| WindowFunctionDefinition::WindowUDF(f.clone()));
343-
344-
if let Some(window_fn) = window_fn {
345-
return Ok(window_fn);
346-
}
347-
348-
// search registered aggregate functions
349-
let agg_fn = session_state
350-
.aggregate_functions()
351-
.get(name)
352-
.map(|f| WindowFunctionDefinition::AggregateUDF(f.clone()));
353-
354-
if let Some(agg_fn) = agg_fn {
355-
return Ok(agg_fn);
356-
}
357-
}
358-
359-
// search default aggregate functions
360-
let agg_fn = all_default_aggregate_functions()
361-
.iter()
362-
.find(|v| v.name() == name || v.aliases().contains(&name.to_string()))
363-
.map(|f| WindowFunctionDefinition::AggregateUDF(f.clone()));
364-
365-
if let Some(agg_fn) = agg_fn {
366-
return Ok(agg_fn);
367-
}
368-
369-
// search default window functions
370-
let window_fn = all_default_window_functions()
371-
.iter()
372-
.find(|v| v.name() == name || v.aliases().contains(&name.to_string()))
373-
.map(|f| WindowFunctionDefinition::WindowUDF(f.clone()));
374-
375-
if let Some(window_fn) = window_fn {
376-
return Ok(window_fn);
377-
}
378-
379-
Err(PyDataFusionError::Common(format!(
380-
"window function `{name}` not found"
381-
)))
382-
}
383-
384-
/// Creates a new Window function expression
385-
#[allow(clippy::too_many_arguments)]
386-
#[pyfunction]
387-
#[pyo3(signature = (name, args, partition_by=None, order_by=None, window_frame=None, filter=None, distinct=false, ctx=None))]
388-
fn window(
389-
name: &str,
390-
args: Vec<PyExpr>,
391-
partition_by: Option<Vec<PyExpr>>,
392-
order_by: Option<Vec<PySortExpr>>,
393-
window_frame: Option<PyWindowFrame>,
394-
filter: Option<PyExpr>,
395-
distinct: bool,
396-
ctx: Option<PySessionContext>,
397-
) -> PyResult<PyExpr> {
398-
let fun = find_window_fn(name, ctx)?;
399-
400-
let window_frame = window_frame
401-
.map(|w| w.into())
402-
.unwrap_or(WindowFrame::new(order_by.as_ref().map(|v| !v.is_empty())));
403-
let filter = filter.map(|f| f.expr.into());
404-
405-
Ok(PyExpr {
406-
expr: datafusion::logical_expr::Expr::WindowFunction(Box::new(WindowFunction {
407-
fun,
408-
params: WindowFunctionParams {
409-
args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
410-
partition_by: partition_by
411-
.unwrap_or_default()
412-
.into_iter()
413-
.map(|x| x.expr)
414-
.collect::<Vec<_>>(),
415-
order_by: order_by
416-
.unwrap_or_default()
417-
.into_iter()
418-
.map(|x| x.into())
419-
.collect::<Vec<_>>(),
420-
window_frame,
421-
filter,
422-
distinct,
423-
null_treatment: None,
424-
},
425-
})),
426-
})
427-
}
428-
429303
// Generates a [pyo3] wrapper for associated aggregate functions.
430304
// All of the builder options are exposed to the python internal
431305
// function and we rely on the wrappers to only use those that
@@ -1186,7 +1060,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
11861060
m.add_wrapped(wrap_pyfunction!(self::uuid))?; // Use self to avoid name collision
11871061
m.add_wrapped(wrap_pyfunction!(var_pop))?;
11881062
m.add_wrapped(wrap_pyfunction!(var_sample))?;
1189-
m.add_wrapped(wrap_pyfunction!(window))?;
11901063
m.add_wrapped(wrap_pyfunction!(regr_avgx))?;
11911064
m.add_wrapped(wrap_pyfunction!(regr_avgy))?;
11921065
m.add_wrapped(wrap_pyfunction!(regr_count))?;

docs/source/user-guide/common-operations/windows.rst

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,7 @@ it's ``Type 2`` column that are null.
175175
Aggregate Functions
176176
-------------------
177177

178-
You can use any :ref:`Aggregation Function<aggregation>` as a window function. Currently
179-
aggregate functions must use the deprecated
180-
:py:func:`datafusion.functions.window` API but this should be resolved in
181-
DataFusion 42.0 (`Issue Link <https://github.com/apache/datafusion-python/issues/833>`_). Here
178+
You can use any :ref:`Aggregation Function<aggregation>` as a window function. Here
182179
is an example that shows how to compare each pokemons’s attack power with the average attack
183180
power in its ``"Type 1"`` using the :py:func:`datafusion.functions.avg` function.
184181

@@ -189,10 +186,12 @@ power in its ``"Type 1"`` using the :py:func:`datafusion.functions.avg` function
189186
col('"Name"'),
190187
col('"Attack"'),
191188
col('"Type 1"'),
192-
f.window("avg", [col('"Attack"')])
193-
.partition_by(col('"Type 1"'))
194-
.build()
195-
.alias("Average Attack"),
189+
f.avg(col('"Attack"')).over(
190+
Window(
191+
window_frame=WindowFrame("rows", None, None),
192+
partition_by=[col('"Type 1"')],
193+
)
194+
).alias("Average Attack"),
196195
)
197196
198197
Available Functions

0 commit comments

Comments
 (0)