Skip to content

Commit b1f3ac1

Browse files
committed
refactor: simplify table provider conversions by introducing utility functions
1 parent 63f36bf commit b1f3ac1

3 files changed

Lines changed: 45 additions & 24 deletions

File tree

src/catalog.rs

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
use crate::dataset::Dataset;
1919
use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError, PyDataFusionResult};
2020
use crate::table::PyTableProvider;
21-
use crate::utils::{table_provider_from_pycapsule, validate_pycapsule, wait_for_future};
21+
use crate::utils::{
22+
table_provider_from_pycapsule, table_provider_send_to_table_provider, table_provider_to_send,
23+
validate_pycapsule, wait_for_future,
24+
};
2225
use async_trait::async_trait;
2326
use datafusion::catalog::{MemoryCatalogProvider, MemorySchemaProvider};
2427
use datafusion::common::DataFusionError;
@@ -354,13 +357,7 @@ impl SchemaProvider for RustWrappedPySchemaProvider {
354357
) -> datafusion::common::Result<Option<Arc<dyn TableProvider>>, DataFusionError> {
355358
// Convert from our internal Send type to the trait expected type
356359
match self.table_inner(name).map_err(to_datafusion_err)? {
357-
Some(table) => {
358-
// Safe conversion: we're widening the bounds (removing Send)
359-
let raw = Arc::into_raw(table);
360-
let wide: *const dyn TableProvider = raw as *const _;
361-
let arc = unsafe { Arc::from_raw(wide) };
362-
Ok(Some(arc))
363-
}
360+
Some(table) => Ok(Some(table_provider_send_to_table_provider(table))),
364361
None => Ok(None),
365362
}
366363
}
@@ -371,11 +368,7 @@ impl SchemaProvider for RustWrappedPySchemaProvider {
371368
table: Arc<dyn TableProvider>,
372369
) -> datafusion::common::Result<Option<Arc<dyn TableProvider>>> {
373370
// Convert from trait type to our internal Send type
374-
let send_table = {
375-
let raw = Arc::into_raw(table);
376-
let send: *const (dyn TableProvider + Send) = raw as *const _;
377-
unsafe { Arc::from_raw(send) }
378-
};
371+
let send_table = table_provider_to_send(table);
379372

380373
let py_table = PyTable::new(send_table);
381374
Python::with_gil(|py| {
@@ -407,12 +400,8 @@ impl SchemaProvider for RustWrappedPySchemaProvider {
407400
// Otherwise, return None.
408401
let dataset = match Dataset::new(&table, py) {
409402
Ok(dataset) => {
410-
// Convert from our internal Send type to trait expected type
411403
let send_table = Arc::new(dataset) as Arc<dyn TableProvider + Send>;
412-
let raw = Arc::into_raw(send_table);
413-
let wide: *const dyn TableProvider = raw as *const _;
414-
let arc = unsafe { Arc::from_raw(wide) };
415-
Some(arc)
404+
Some(table_provider_send_to_table_provider(send_table))
416405
}
417406
Err(_) => None,
418407
};

src/udtf.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ use std::sync::Arc;
2121
use crate::errors::{py_datafusion_err, to_datafusion_err};
2222
use crate::expr::PyExpr;
2323
use crate::table::PyTableProvider;
24-
use crate::utils::{table_provider_from_pycapsule, validate_pycapsule};
24+
use crate::utils::{
25+
table_provider_from_pycapsule, table_provider_send_to_table_provider, validate_pycapsule,
26+
};
2527
use datafusion::catalog::{TableFunctionImpl, TableProvider};
2628
use datafusion::error::Result as DataFusionResult;
2729
use datafusion::logical_expr::Expr;
@@ -113,11 +115,7 @@ impl TableFunctionImpl for PyTableFunction {
113115
PyTableFunctionInner::FFIFunction(func) => func.call(args),
114116
PyTableFunctionInner::PythonFunction(obj) => {
115117
let send_result = call_python_table_function(obj, args)?;
116-
// Convert from our Send type to the trait expected type
117-
let raw = Arc::into_raw(send_result);
118-
let wide: *const dyn TableProvider = raw as *const _;
119-
let arc = unsafe { Arc::from_raw(wide) };
120-
Ok(arc)
118+
Ok(table_provider_send_to_table_provider(send_result))
121119
}
122120
}
123121
}

src/utils.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,40 @@ pub(crate) fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyRe
122122
Ok(())
123123
}
124124

125+
/// Convert a [`TableProvider`] wrapped in an [`Arc`] with a `Send` auto trait into one
126+
/// without the marker.
127+
///
128+
/// # Safety
129+
///
130+
/// Removing `Send` from a trait object only relaxes the bounds. The underlying vtable is
131+
/// unchanged, so it is safe to reuse the pointer produced by [`Arc::into_raw`].
132+
pub(crate) fn table_provider_send_to_table_provider(
133+
table: Arc<dyn TableProvider + Send>,
134+
) -> Arc<dyn TableProvider> {
135+
let raw: *const (dyn TableProvider + Send) = Arc::into_raw(table);
136+
// SAFETY: `Send` is an auto trait with no associated data, so the trait object layout
137+
// is identical and the pointer may be reinterpreted without changing the reference
138+
// count.
139+
unsafe { Arc::from_raw(raw as *const dyn TableProvider) }
140+
}
141+
142+
/// Convert a [`TableProvider`] wrapped in an [`Arc`] into one that also carries the `Send`
143+
/// auto trait.
144+
///
145+
/// # Safety
146+
///
147+
/// DataFusion's `TableProvider` trait requires `Send`, so the underlying provider implements
148+
/// the marker. This allows us to reinterpret the pointer as a `TableProvider + Send` trait
149+
/// object.
150+
pub(crate) fn table_provider_to_send(
151+
table: Arc<dyn TableProvider>,
152+
) -> Arc<dyn TableProvider + Send> {
153+
let raw: *const dyn TableProvider = Arc::into_raw(table);
154+
// SAFETY: The underlying type implements `Send`, so the pointer can be safely treated as
155+
// a `TableProvider + Send` trait object.
156+
unsafe { Arc::from_raw(raw as *const (dyn TableProvider + Send)) }
157+
}
158+
125159
pub(crate) fn table_provider_from_pycapsule(
126160
obj: &Bound<PyAny>,
127161
) -> PyResult<Option<Arc<dyn TableProvider + Send>>> {

0 commit comments

Comments
 (0)