|
34 | 34 | column, |
35 | 35 | literal, |
36 | 36 | ) |
| 37 | +from datafusion import ( |
| 38 | + col as df_col, |
| 39 | +) |
37 | 40 | from datafusion import ( |
38 | 41 | functions as f, |
39 | 42 | ) |
@@ -2753,3 +2756,34 @@ def test_show_from_empty_batch(capsys) -> None: |
2753 | 2756 | ctx.create_dataframe([[batch]]).show() |
2754 | 2757 | out = capsys.readouterr().out |
2755 | 2758 | assert "| a |" in out |
| 2759 | + |
| 2760 | + |
| 2761 | +@pytest.mark.parametrize("file_sort_order", [[["a"]], [[df_col("a")]]]) |
| 2762 | +def test_register_parquet_file_sort_order(ctx, tmp_path, file_sort_order): |
| 2763 | + table = pa.table({"a": [1, 2]}) |
| 2764 | + path = tmp_path / "file.parquet" |
| 2765 | + pa.parquet.write_table(table, path) |
| 2766 | + ctx.register_parquet("t", path, file_sort_order=file_sort_order) |
| 2767 | + assert "t" in ctx.catalog().schema().names() |
| 2768 | + |
| 2769 | + |
| 2770 | +@pytest.mark.parametrize("file_sort_order", [[["a"]], [[df_col("a")]]]) |
| 2771 | +def test_register_listing_table_file_sort_order(ctx, tmp_path, file_sort_order): |
| 2772 | + table = pa.table({"a": [1, 2]}) |
| 2773 | + dir_path = tmp_path / "dir" |
| 2774 | + dir_path.mkdir() |
| 2775 | + pa.parquet.write_table(table, dir_path / "file.parquet") |
| 2776 | + ctx.register_listing_table( |
| 2777 | + "t", dir_path, schema=table.schema, file_sort_order=file_sort_order |
| 2778 | + ) |
| 2779 | + assert "t" in ctx.catalog().schema().names() |
| 2780 | + |
| 2781 | + |
| 2782 | +@pytest.mark.parametrize("file_sort_order", [[["a"]], [[df_col("a")]]]) |
| 2783 | +def test_read_parquet_file_sort_order(tmp_path, file_sort_order): |
| 2784 | + ctx = SessionContext() |
| 2785 | + table = pa.table({"a": [1, 2]}) |
| 2786 | + path = tmp_path / "data.parquet" |
| 2787 | + pa.parquet.write_table(table, path) |
| 2788 | + df = ctx.read_parquet(path, file_sort_order=file_sort_order) |
| 2789 | + assert df.collect()[0].column(0).to_pylist() == [1, 2] |
0 commit comments