Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions .cargo/config.toml

This file was deleted.

12 changes: 0 additions & 12 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,6 @@ jobs:
steps:
- uses: actions/checkout@v6

- name: Verify example datafusion version
run: |
MAIN_VERSION=$(grep -A 1 "name = \"datafusion-common\"" Cargo.lock | grep "version = " | head -1 | sed 's/.*version = "\(.*\)"/\1/')
EXAMPLE_VERSION=$(grep -A 1 "name = \"datafusion-common\"" examples/datafusion-ffi-example/Cargo.lock | grep "version = " | head -1 | sed 's/.*version = "\(.*\)"/\1/')
echo "Main crate datafusion version: $MAIN_VERSION"
echo "FFI example datafusion version: $EXAMPLE_VERSION"

if [ "$MAIN_VERSION" != "$EXAMPLE_VERSION" ]; then
echo "❌ Error: FFI example datafusion versions don't match!"
exit 1
fi

- name: Setup Python
uses: actions/setup-python@v6
with:
Expand Down
33 changes: 33 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 29 additions & 49 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
# specific language governing permissions and limitations
# under the License.

[package]
name = "datafusion-python"
[workspace.package]
version = "52.0.0"
homepage = "https://datafusion.apache.org/python"
repository = "https://github.com/apache/datafusion-python"
Expand All @@ -26,67 +25,43 @@ readme = "README.md"
license = "Apache-2.0"
edition = "2024"
rust-version = "1.88"
include = [
"/src",
"/datafusion",
"/LICENSE.txt",
"build.rs",
"pyproject.toml",
"Cargo.toml",
"Cargo.lock",
]

[features]
default = ["mimalloc"]
protoc = ["datafusion-substrait/protoc"]
substrait = ["dep:datafusion-substrait"]
[workspace]
members = ["crates/core", "crates/util", "examples/datafusion-ffi-example"]
resolver = "3"

[dependencies]
tokio = { version = "1.49", features = [
"macros",
"rt",
"rt-multi-thread",
"sync",
] }
pyo3 = { version = "0.28", features = [
"extension-module",
"abi3",
"abi3-py310",
] }
pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
[workspace.dependencies]
tokio = { version = "1.49" }
pyo3 = { version = "0.28" }
pyo3-async-runtimes = { version = "0.28" }
pyo3-log = "0.13.3"
arrow = { version = "58", features = ["pyarrow"] }
arrow = { version = "58" }
arrow-array = { version = "58" }
arrow-schema = { version = "58" }
arrow-select = { version = "58" }
datafusion = { version = "53", features = ["avro", "unicode_expressions"] }
datafusion-substrait = { version = "53", optional = true }
datafusion = { version = "53" }
datafusion-substrait = { version = "53" }
datafusion-proto = { version = "53" }
datafusion-ffi = { version = "53" }
prost = "0.14.3" # keep in line with `datafusion-substrait`
datafusion-catalog = { version = "53", default-features = false }
datafusion-common = { version = "53", default-features = false }
datafusion-functions-aggregate = { version = "53" }
datafusion-functions-window = { version = "53" }
datafusion-expr = { version = "53" }
prost = "0.14.3"
serde_json = "1"
uuid = { version = "1.21", features = ["v4"] }
mimalloc = { version = "0.1", optional = true, default-features = false, features = [
"local_dynamic_tls",
] }
uuid = { version = "1.21" }
mimalloc = { version = "0.1", default-features = false }
async-trait = "0.1.89"
futures = "0.3"
cstr = "0.2"
object_store = { version = "0.13.1", features = [
"aws",
"gcp",
"azure",
"http",
] }
object_store = { version = "0.13.1" }
url = "2"
log = "0.4.29"
parking_lot = "0.12"

[build-dependencies]
prost-types = "0.14.3" # keep in line with `datafusion-substrait`
prost-types = "0.14.3" # keep in line with `datafusion-substrait`
pyo3-build-config = "0.28"

[lib]
name = "datafusion_python"
crate-type = ["cdylib", "rlib"]
datafusion-python-util = { path = "crates/util" }

[profile.release]
lto = true
Expand All @@ -99,3 +74,8 @@ datafusion = { git = "https://github.com/apache/datafusion.git", rev = "35749607
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
datafusion-proto = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
datafusion-catalog = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
datafusion-functions-aggregate = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
datafusion-functions-window = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" }
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,15 @@ uv run --no-project maturin develop --uv
uv run --no-project pytest .
```

To run the FFI tests within the examples folder, after you have built
`datafusion-python` with the previous commands:

```bash
cd examples/datafusion-ffi-example
uv run --no-project maturin develop --uv
uv run --no-project pytest python/tests/_test_*py
```

### Running & Installing pre-commit hooks

`datafusion-python` takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce
Expand Down
80 changes: 80 additions & 0 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "datafusion-python"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
Comment thread
timsaucer marked this conversation as resolved.
include = [
"src",
"datafusion",
Comment thread
timsaucer marked this conversation as resolved.
Outdated
"../LICENSE.txt",
"build.rs",
"../pyproject.toml",
"Cargo.toml",
"../Cargo.lock",
]

[dependencies]
tokio = { workspace = true, features = [
"macros",
"rt",
"rt-multi-thread",
"sync",
] }
pyo3 = { workspace = true, features = [
"extension-module",
"abi3",
"abi3-py310",
] }
pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] }
pyo3-log = { workspace = true }
arrow = { workspace = true, features = ["pyarrow"] }
arrow-select = { workspace = true }
datafusion = { workspace = true, features = ["avro", "unicode_expressions"] }
datafusion-substrait = { workspace = true, optional = true }
datafusion-proto = { workspace = true }
datafusion-ffi = { workspace = true }
prost = { workspace = true } # keep in line with `datafusion-substrait`
serde_json = { workspace = true }
uuid = { workspace = true, features = ["v4"] }
mimalloc = { workspace = true, optional = true, features = [
"local_dynamic_tls",
] }
async-trait = { workspace = true }
futures = { workspace = true }
cstr = { workspace = true }
object_store = { workspace = true, features = ["aws", "gcp", "azure", "http"] }
url = { workspace = true }
log = { workspace = true }
parking_lot = { workspace = true }
datafusion-python-util = { workspace = true }

[build-dependencies]
prost-types = { workspace = true }
pyo3-build-config = { workspace = true }

[features]
default = ["mimalloc"]
protoc = ["datafusion-substrait/protoc"]
substrait = ["dep:datafusion-substrait"]

[lib]
name = "datafusion_python"
crate-type = ["cdylib", "rlib"]
File renamed without changes.
2 changes: 1 addition & 1 deletion src/array.rs → crates/core/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ use arrow::array::{Array, ArrayRef};
use arrow::datatypes::{Field, FieldRef};
use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
use arrow::pyarrow::ToPyArrow;
use datafusion_python_util::validate_pycapsule;
use pyo3::ffi::c_str;
use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods};
use pyo3::types::PyCapsule;
use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods};

use crate::errors::PyDataFusionResult;
use crate::utils::validate_pycapsule;

/// A Python object which implements the Arrow PyCapsule for importing
/// into other libraries.
Expand Down
20 changes: 16 additions & 4 deletions src/catalog.rs → crates/core/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,20 @@ use datafusion::datasource::TableProvider;
use datafusion_ffi::catalog_provider::FFI_CatalogProvider;
use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
use datafusion_ffi::schema_provider::FFI_SchemaProvider;
use datafusion_python_util::{
create_logical_extension_capsule, ffi_logical_codec_from_pycapsule, validate_pycapsule,
wait_for_future,
};
use pyo3::IntoPyObjectExt;
use pyo3::exceptions::PyKeyError;
use pyo3::ffi::c_str;
use pyo3::prelude::*;
use pyo3::types::PyCapsule;

use crate::context::PySessionContext;
use crate::dataset::Dataset;
use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err, to_datafusion_err};
use crate::table::PyTable;
use crate::utils::{
create_logical_extension_capsule, extract_logical_extension_codec, validate_pycapsule,
wait_for_future,
};

#[pyclass(
from_py_object,
Expand Down Expand Up @@ -710,6 +711,17 @@ fn extract_schema_provider_from_pyobj(
Ok(provider)
}

fn extract_logical_extension_codec(
py: Python,
obj: Option<Bound<PyAny>>,
) -> PyResult<Arc<FFI_LogicalExtensionCodec>> {
let obj = match obj {
Some(obj) => obj,
None => PySessionContext::global_ctx()?.into_bound_py_any(py)?,
};
ffi_logical_codec_from_pycapsule(obj).map(Arc::new)
}

pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyCatalog>()?;
m.add_class::<PySchema>()?;
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
13 changes: 6 additions & 7 deletions src/context.rs → crates/core/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ use datafusion_ffi::catalog_provider_list::FFI_CatalogProviderList;
use datafusion_ffi::execution::FFI_TaskContextProvider;
use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
use datafusion_python_util::{
create_logical_extension_capsule, ffi_logical_codec_from_pycapsule, get_global_ctx,
get_tokio_runtime, spawn_future, validate_pycapsule, wait_for_future,
};
use object_store::ObjectStore;
use pyo3::IntoPyObjectExt;
use pyo3::exceptions::{PyKeyError, PyValueError};
Expand Down Expand Up @@ -82,10 +86,6 @@ use crate::udaf::PyAggregateUDF;
use crate::udf::PyScalarUDF;
use crate::udtf::PyTableFunction;
use crate::udwf::PyWindowUDF;
use crate::utils::{
create_logical_extension_capsule, extract_logical_extension_codec, get_global_ctx,
get_tokio_runtime, spawn_future, validate_pycapsule, wait_for_future,
};

/// Configuration options for a SessionContext
#[pyclass(
Expand Down Expand Up @@ -1187,8 +1187,7 @@ impl PySessionContext {
&self,
codec: Bound<'py, PyAny>,
) -> PyDataFusionResult<Self> {
let py = codec.py();
let logical_codec = extract_logical_extension_codec(py, Some(codec))?;
let logical_codec = Arc::new(ffi_logical_codec_from_pycapsule(codec)?);

Ok({
Self {
Expand Down Expand Up @@ -1246,7 +1245,7 @@ impl PySessionContext {

fn default_logical_codec(ctx: &Arc<SessionContext>) -> Arc<FFI_LogicalExtensionCodec> {
let codec = Arc::new(DefaultLogicalExtensionCodec {});
let runtime = get_tokio_runtime().0.handle().clone();
let runtime = get_tokio_runtime().handle().clone();
let ctx_provider = Arc::clone(ctx) as Arc<dyn TaskContextProvider>;
Arc::new(FFI_LogicalExtensionCodec::new(
codec,
Expand Down
Loading