Skip to content

Commit 3d3af30

Browse files
committed
feat: implement lazy loading for pyarrow to optimize imports
1 parent ce1cbcc commit 3d3af30

3 files changed

Lines changed: 40 additions & 4 deletions

File tree

python/datafusion/expr.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@
4040
if TYPE_CHECKING:
4141
from datafusion.plan import LogicalPlan
4242

43+
44+
class _LazyPyArrow:
45+
"""Lazily import :mod:`pyarrow` on first attribute access."""
46+
47+
def __getattr__(self, name: str) -> Any: # pragma: no cover - optional dependency
48+
import pyarrow as pa # type: ignore[import]
49+
50+
return getattr(pa, name)
51+
52+
53+
pa = _LazyPyArrow()
54+
4355
# The following are imported from the internal representation. We may choose to
4456
# give these all proper wrappers, or to simply leave as is. These were added
4557
# in order to support passing the `test_imports` unit test.

python/datafusion/functions.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020

2121
from typing import TYPE_CHECKING, Any, Optional
2222

23-
import pyarrow as pa
24-
2523
from datafusion._internal import functions as f
2624
from datafusion.common import NullTreatment
2725
from datafusion.expr import (
@@ -36,6 +34,18 @@
3634
if TYPE_CHECKING:
3735
from datafusion.context import SessionContext
3836

37+
38+
class _LazyPyArrow:
39+
"""Lazily import :mod:`pyarrow` on first attribute access."""
40+
41+
def __getattr__(self, name: str) -> Any: # pragma: no cover - optional dependency
42+
import pyarrow as pa # type: ignore[import]
43+
44+
return getattr(pa, name)
45+
46+
47+
pa = _LazyPyArrow()
48+
3949
__all__ = [
4050
"abs",
4151
"acos",

python/datafusion/user_defined.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,27 @@
2424
from enum import Enum
2525
from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, TypeVar, overload
2626

27-
import pyarrow as pa
28-
2927
import datafusion._internal as df_internal
3028
from datafusion.expr import Expr
3129

3230
if TYPE_CHECKING:
31+
import pyarrow as pa
32+
3333
_R = TypeVar("_R", bound=pa.DataType)
34+
else:
35+
_R = TypeVar("_R")
36+
37+
38+
class _LazyPyArrow:
39+
"""Lazily import :mod:`pyarrow` on first attribute access."""
40+
41+
def __getattr__(self, name: str) -> Any: # pragma: no cover - optional dependency
42+
import pyarrow as pa # type: ignore[import]
43+
44+
return getattr(pa, name)
45+
46+
47+
pa = _LazyPyArrow()
3448

3549

3650
class Volatility(Enum):

0 commit comments

Comments
 (0)