File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 2121import time
2222
2323import pyarrow as pa
24- from datafusion import SessionContext , col
25- from datafusion import functions as f
24+ from datafusion import SessionContext
2625
2726
2827def run (
@@ -47,36 +46,20 @@ def run(
4746 df = ctx .create_dataframe (partitions )
4847
4948 start = time .perf_counter ()
50- df .aggregate ([], [ f . sum ( col ( "a" ))]). collect ()
49+ df .collect ()
5150 duration = time .perf_counter () - start
52- print (f"{ n_batches } batches aggregated in { duration :.3f} s" )
51+ print (f"{ n_batches } batches collected in { duration :.3f} s" )
5352
5453
5554if __name__ == "__main__" :
5655 import argparse
5756
5857 parser = argparse .ArgumentParser ()
59- parser .add_argument (
60- "--batches" ,
61- type = int ,
62- default = 8 ,
63- help = "number of input batches to generate" ,
64- )
65- parser .add_argument (
66- "--batch-size" ,
67- type = int ,
68- default = 1_000_000 ,
69- help = "number of rows per batch" ,
70- )
7158 parser .add_argument (
7259 "--partitions" ,
7360 type = int ,
7461 default = None ,
7562 help = "number of partitions to create (defaults to one per batch)" ,
7663 )
7764 args = parser .parse_args ()
78- run (
79- n_batches = args .batches ,
80- batch_size = args .batch_size ,
81- n_partitions = args .partitions ,
82- )
65+ run (n_partitions = args .partitions )
You can’t perform that action at this time.
0 commit comments