Skip to content

Commit 1e59164

Browse files
authored
Remove core dependency from ffi (#19422)
## Which issue does this PR close? Closes #18671 ## Rationale for this change With the latest changes for #18671 we no longer require `datafusion` crate as a dependency. This will reduce build times for users. Also it guarantees we do not accidentally introduce code that will create a `SessionContext` or any other large binary inside our FFI implementations. ## What changes are included in this PR? - Remove `datafusion` crate from Cargo.toml - Update paths - Apply consistent formatting ## Are these changes tested? Existing unit tests. ## Are there any user-facing changes? No. This only updates paths.
1 parent a886b9e commit 1e59164

35 files changed

Lines changed: 334 additions & 353 deletions

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/ffi/Cargo.toml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,25 @@ workspace = true
4040
name = "datafusion_ffi"
4141
crate-type = ["cdylib", "rlib"]
4242

43+
# Note to developers: do *not* add `datafusion` as a dependency in this crate.
44+
# It increases build times and library binary size for users.
45+
4346
[dependencies]
4447
abi_stable = "0.11.3"
4548
arrow = { workspace = true, features = ["ffi"] }
4649
arrow-schema = { workspace = true }
4750
async-ffi = { version = "0.5.0", features = ["abi_stable"] }
4851
async-trait = { workspace = true }
49-
datafusion = { workspace = true, default-features = false }
5052
datafusion-catalog = { workspace = true }
5153
datafusion-common = { workspace = true }
5254
datafusion-datasource = { workspace = true }
5355
datafusion-execution = { workspace = true }
5456
datafusion-expr = { workspace = true }
57+
datafusion-functions = { workspace = true, optional = true }
58+
datafusion-functions-aggregate = { workspace = true, optional = true }
5559
datafusion-functions-aggregate-common = { workspace = true }
60+
datafusion-functions-table = { workspace = true, optional = true }
61+
datafusion-functions-window = { workspace = true, optional = true }
5662
datafusion-physical-expr = { workspace = true }
5763
datafusion-physical-expr-common = { workspace = true }
5864
datafusion-physical-plan = { workspace = true }
@@ -74,5 +80,10 @@ datafusion-functions-window = { workspace = true }
7480
doc-comment = { workspace = true }
7581

7682
[features]
77-
integration-tests = []
83+
integration-tests = [
84+
"datafusion-functions",
85+
"datafusion-functions-aggregate",
86+
"datafusion-functions-table",
87+
"datafusion-functions-window",
88+
]
7889
tarpaulin_include = [] # Exists only to prevent warnings on stable and still have accurate coverage

datafusion/ffi/src/arrow_wrappers.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@
1818
use std::sync::Arc;
1919

2020
use abi_stable::StableAbi;
21-
use arrow::{
22-
array::{ArrayRef, make_array},
23-
datatypes::{Schema, SchemaRef},
24-
error::ArrowError,
25-
ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi, to_ffi},
26-
};
21+
use arrow::array::{ArrayRef, make_array};
22+
use arrow::datatypes::{Schema, SchemaRef};
23+
use arrow::error::ArrowError;
24+
use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi, to_ffi};
2725
use datafusion_common::{DataFusionError, ScalarValue};
2826
use log::error;
2927

datafusion/ffi/src/execution_plan.rs

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,24 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::{ffi::c_void, pin::Pin, sync::Arc};
19-
20-
use abi_stable::{
21-
StableAbi,
22-
std_types::{RString, RVec},
23-
};
24-
use datafusion::{
25-
error::DataFusionError,
26-
execution::{SendableRecordBatchStream, TaskContext},
27-
physical_plan::{DisplayAs, ExecutionPlan, PlanProperties},
18+
use std::ffi::c_void;
19+
use std::pin::Pin;
20+
use std::sync::Arc;
21+
22+
use abi_stable::StableAbi;
23+
use abi_stable::std_types::{RString, RVec};
24+
use datafusion_common::{DataFusionError, Result};
25+
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
26+
use datafusion_physical_plan::{
27+
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
2828
};
29-
use datafusion::{error::Result, physical_plan::DisplayFormatType};
3029
use tokio::runtime::Handle;
3130

3231
use crate::execution::FFI_TaskContext;
32+
use crate::plan_properties::FFI_PlanProperties;
33+
use crate::record_batch_stream::FFI_RecordBatchStream;
3334
use crate::util::FFIResult;
34-
use crate::{
35-
df_result, plan_properties::FFI_PlanProperties,
36-
record_batch_stream::FFI_RecordBatchStream, rresult,
37-
};
35+
use crate::{df_result, rresult};
3836

3937
/// A stable struct for sharing a [`ExecutionPlan`] across FFI boundaries.
4038
#[repr(C)]
@@ -300,12 +298,11 @@ impl ExecutionPlan for ForeignExecutionPlan {
300298

301299
#[cfg(test)]
302300
pub(crate) mod tests {
303-
use super::*;
304301
use arrow::datatypes::{DataType, Field, Schema};
305-
use datafusion::physical_plan::{
306-
Partitioning,
307-
execution_plan::{Boundedness, EmissionType},
308-
};
302+
use datafusion::physical_plan::Partitioning;
303+
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
304+
305+
use super::*;
309306

310307
#[derive(Debug)]
311308
pub struct EmptyExec {

datafusion/ffi/src/expr/distribution.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,16 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::arrow_wrappers::WrappedArray;
19-
use crate::expr::interval::FFI_Interval;
2018
use abi_stable::StableAbi;
2119
use datafusion_common::DataFusionError;
2220
use datafusion_expr::statistics::{
2321
BernoulliDistribution, Distribution, ExponentialDistribution, GaussianDistribution,
2422
GenericDistribution, UniformDistribution,
2523
};
2624

25+
use crate::arrow_wrappers::WrappedArray;
26+
use crate::expr::interval::FFI_Interval;
27+
2728
/// A stable struct for sharing [`Distribution`] across FFI boundaries.
2829
/// See ['Distribution'] for the meaning of each variant.
2930
#[repr(C)]

datafusion/ffi/src/expr/interval.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::arrow_wrappers::WrappedArray;
1918
use abi_stable::StableAbi;
2019
use datafusion_common::DataFusionError;
2120
use datafusion_expr::interval_arithmetic::Interval;
2221

22+
use crate::arrow_wrappers::WrappedArray;
23+
2324
/// A stable struct for sharing [`Interval`] across FFI boundaries.
2425
/// See [`Interval`] for the meaning of each field. Scalar values
2526
/// are passed as Arrow arrays of length 1.

datafusion/ffi/src/insert_op.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use abi_stable::StableAbi;
19-
use datafusion::logical_expr::logical_plan::dml::InsertOp;
19+
use datafusion_expr::logical_plan::dml::InsertOp;
2020

2121
/// FFI safe version of [`InsertOp`].
2222
#[repr(C)]

datafusion/ffi/src/physical_expr/mod.rs

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -18,42 +18,36 @@
1818
pub(crate) mod partitioning;
1919
pub(crate) mod sort;
2020

21-
use std::{
22-
any::Any,
23-
ffi::c_void,
24-
fmt::{Display, Formatter},
25-
hash::{DefaultHasher, Hash, Hasher},
26-
sync::Arc,
27-
};
28-
29-
use abi_stable::{
30-
StableAbi,
31-
std_types::{ROption, RResult, RString, RVec},
32-
};
33-
use arrow::{
34-
array::{ArrayRef, BooleanArray, RecordBatch},
35-
datatypes::SchemaRef,
36-
};
37-
use arrow_schema::{DataType, Field, FieldRef, Schema, ffi::FFI_ArrowSchema};
21+
use std::any::Any;
22+
use std::ffi::c_void;
23+
use std::fmt::{Display, Formatter};
24+
use std::hash::{DefaultHasher, Hash, Hasher};
25+
use std::sync::Arc;
26+
27+
use abi_stable::StableAbi;
28+
use abi_stable::std_types::{ROption, RResult, RString, RVec};
29+
use arrow::array::{ArrayRef, BooleanArray, RecordBatch};
30+
use arrow::datatypes::SchemaRef;
31+
use arrow_schema::ffi::FFI_ArrowSchema;
32+
use arrow_schema::{DataType, Field, FieldRef, Schema};
3833
use datafusion_common::{Result, ffi_datafusion_err};
39-
use datafusion_expr::{
40-
ColumnarValue, interval_arithmetic::Interval, sort_properties::ExprProperties,
41-
statistics::Distribution,
42-
};
34+
use datafusion_expr::ColumnarValue;
35+
use datafusion_expr::interval_arithmetic::Interval;
36+
use datafusion_expr::sort_properties::ExprProperties;
37+
use datafusion_expr::statistics::Distribution;
4338
use datafusion_physical_expr::PhysicalExpr;
4439
use datafusion_physical_expr_common::physical_expr::fmt_sql;
4540

46-
use crate::{
47-
arrow_wrappers::{WrappedArray, WrappedSchema},
48-
df_result,
49-
expr::{
50-
columnar_value::FFI_ColumnarValue, distribution::FFI_Distribution,
51-
expr_properties::FFI_ExprProperties, interval::FFI_Interval,
52-
},
53-
record_batch_stream::{record_batch_to_wrapped_array, wrapped_array_to_record_batch},
54-
rresult, rresult_return,
55-
util::FFIResult,
41+
use crate::arrow_wrappers::{WrappedArray, WrappedSchema};
42+
use crate::expr::columnar_value::FFI_ColumnarValue;
43+
use crate::expr::distribution::FFI_Distribution;
44+
use crate::expr::expr_properties::FFI_ExprProperties;
45+
use crate::expr::interval::FFI_Interval;
46+
use crate::record_batch_stream::{
47+
record_batch_to_wrapped_array, wrapped_array_to_record_batch,
5648
};
49+
use crate::util::FFIResult;
50+
use crate::{df_result, rresult, rresult_return};
5751

5852
#[repr(C)]
5953
#[derive(Debug, StableAbi)]
@@ -737,14 +731,14 @@ impl Display for ForeignPhysicalExpr {
737731

738732
#[cfg(test)]
739733
mod tests {
740-
use std::{
741-
hash::{DefaultHasher, Hash, Hasher},
742-
sync::Arc,
743-
};
734+
use std::hash::{DefaultHasher, Hash, Hasher};
735+
use std::sync::Arc;
744736

745737
use arrow::array::{BooleanArray, RecordBatch, record_batch};
746-
use datafusion_common::{DataFusionError, ScalarValue, tree_node::DynTreeNode};
747-
use datafusion_expr::{interval_arithmetic::Interval, statistics::Distribution};
738+
use datafusion_common::tree_node::DynTreeNode;
739+
use datafusion_common::{DataFusionError, ScalarValue};
740+
use datafusion_expr::interval_arithmetic::Interval;
741+
use datafusion_expr::statistics::Distribution;
748742
use datafusion_physical_expr::expressions::{Column, NegativeExpr, NotExpr};
749743
use datafusion_physical_expr_common::physical_expr::{PhysicalExpr, fmt_sql};
750744

datafusion/ffi/src/physical_expr/partitioning.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717

1818
use std::sync::Arc;
1919

20-
use abi_stable::{StableAbi, std_types::RVec};
20+
use abi_stable::StableAbi;
21+
use abi_stable::std_types::RVec;
2122
use datafusion_physical_expr::Partitioning;
2223
use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
2324

@@ -70,7 +71,8 @@ impl From<&FFI_Partitioning> for Partitioning {
7071

7172
#[cfg(test)]
7273
mod tests {
73-
use datafusion_physical_expr::{Partitioning, expressions::lit};
74+
use datafusion_physical_expr::Partitioning;
75+
use datafusion_physical_expr::expressions::lit;
7476

7577
use crate::physical_expr::partitioning::FFI_Partitioning;
7678

datafusion/ffi/src/physical_expr/sort.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,14 @@ impl From<&FFI_PhysicalSortExpr> for PhysicalSortExpr {
5555

5656
#[cfg(test)]
5757
mod tests {
58-
use crate::physical_expr::sort::FFI_PhysicalSortExpr;
58+
use std::sync::Arc;
59+
5960
use arrow_schema::SortOptions;
6061
use datafusion_physical_expr::expressions::Column;
6162
use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
6263
use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
63-
use std::sync::Arc;
64+
65+
use crate::physical_expr::sort::FFI_PhysicalSortExpr;
6466

6567
#[test]
6668
fn ffi_sort_expr_round_trip() {

0 commit comments

Comments
 (0)