Skip to content

Commit 5141ad9

Browse files
committed
checkout merged dataframe.py
1 parent 61589e5 commit 5141ad9

1 file changed

Lines changed: 23 additions & 26 deletions

File tree

python/datafusion/dataframe.py

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,13 @@
2222
from __future__ import annotations
2323

2424
import warnings
25-
from collections.abc import Sequence
25+
from collections.abc import Iterable, Sequence
2626
from typing import (
2727
TYPE_CHECKING,
2828
Any,
2929
AsyncIterator,
30-
Iterable,
3130
Iterator,
3231
Literal,
33-
Optional,
34-
Union,
3532
overload,
3633
)
3734

@@ -59,7 +56,7 @@
5956

6057
if TYPE_CHECKING:
6158
import pathlib
62-
from typing import Callable
59+
from collections.abc import Callable
6360

6461
import pandas as pd
6562
import polars as pl
@@ -82,7 +79,7 @@ class Compression(Enum):
8279
LZ4 = "lz4"
8380
# lzo is not implemented yet
8481
# https://github.com/apache/arrow-rs/issues/6970
85-
# LZO = "lzo"
82+
# LZO = "lzo" # noqa: ERA001
8683
ZSTD = "zstd"
8784
LZ4_RAW = "lz4_raw"
8885

@@ -109,7 +106,7 @@ def from_str(cls: type[Compression], value: str) -> Compression:
109106
"""
110107
raise ValueError(error_msg) from err
111108

112-
def get_default_level(self) -> Optional[int]:
109+
def get_default_level(self) -> int | None:
113110
"""Get the default compression level for the compression type.
114111
115112
Returns:
@@ -142,24 +139,24 @@ def __init__(
142139
write_batch_size: int = 1024,
143140
writer_version: str = "1.0",
144141
skip_arrow_metadata: bool = False,
145-
compression: Optional[str] = "zstd(3)",
146-
compression_level: Optional[int] = None,
147-
dictionary_enabled: Optional[bool] = True,
142+
compression: str | None = "zstd(3)",
143+
compression_level: int | None = None,
144+
dictionary_enabled: bool | None = True,
148145
dictionary_page_size_limit: int = 1024 * 1024,
149-
statistics_enabled: Optional[str] = "page",
146+
statistics_enabled: str | None = "page",
150147
max_row_group_size: int = 1024 * 1024,
151148
created_by: str = "datafusion-python",
152-
column_index_truncate_length: Optional[int] = 64,
153-
statistics_truncate_length: Optional[int] = None,
149+
column_index_truncate_length: int | None = 64,
150+
statistics_truncate_length: int | None = None,
154151
data_page_row_count_limit: int = 20_000,
155-
encoding: Optional[str] = None,
152+
encoding: str | None = None,
156153
bloom_filter_on_write: bool = False,
157-
bloom_filter_fpp: Optional[float] = None,
158-
bloom_filter_ndv: Optional[int] = None,
154+
bloom_filter_fpp: float | None = None,
155+
bloom_filter_ndv: int | None = None,
159156
allow_single_file_parallelism: bool = True,
160157
maximum_parallel_row_group_writers: int = 1,
161158
maximum_buffered_record_batches_per_stream: int = 2,
162-
column_specific_options: Optional[dict[str, ParquetColumnOptions]] = None,
159+
column_specific_options: dict[str, ParquetColumnOptions] | None = None,
163160
) -> None:
164161
"""Initialize the ParquetWriterOptions.
165162
@@ -264,13 +261,13 @@ class ParquetColumnOptions:
264261

265262
def __init__(
266263
self,
267-
encoding: Optional[str] = None,
268-
dictionary_enabled: Optional[bool] = None,
269-
compression: Optional[str] = None,
270-
statistics_enabled: Optional[str] = None,
271-
bloom_filter_enabled: Optional[bool] = None,
272-
bloom_filter_fpp: Optional[float] = None,
273-
bloom_filter_ndv: Optional[int] = None,
264+
encoding: str | None = None,
265+
dictionary_enabled: bool | None = None,
266+
compression: str | None = None,
267+
statistics_enabled: str | None = None,
268+
bloom_filter_enabled: bool | None = None,
269+
bloom_filter_fpp: float | None = None,
270+
bloom_filter_ndv: int | None = None,
274271
) -> None:
275272
"""Initialize the ParquetColumnOptions.
276273
@@ -836,7 +833,7 @@ def join(
836833
# of a keyword argument.
837834
if (
838835
isinstance(on, tuple)
839-
and len(on) == 2
836+
and len(on) == 2 # noqa: PLR2004
840837
and isinstance(on[0], list)
841838
and isinstance(on[1], list)
842839
):
@@ -1068,7 +1065,7 @@ def write_parquet(
10681065
def write_parquet(
10691066
self,
10701067
path: str | pathlib.Path,
1071-
compression: Union[str, Compression, ParquetWriterOptions] = Compression.ZSTD,
1068+
compression: str | Compression | ParquetWriterOptions = Compression.ZSTD,
10721069
compression_level: int | None = None,
10731070
write_options: DataFrameWriteOptions | None = None,
10741071
) -> None:

0 commit comments

Comments
 (0)