@@ -353,20 +353,6 @@ impl ParquetAccessPlan {
353353 let row_selection = self . into_overall_row_selection ( row_group_meta_data) ?;
354354 PreparedAccessPlan :: new ( row_group_indexes, row_selection)
355355 }
356-
357- /// Like [`prepare`](Self::prepare), but also applies an
358- /// `AccessPlanOptimizer` to reorder/reverse row groups after
359- /// preparing the plan.
360- pub ( crate ) fn prepare_with_optimizer (
361- self ,
362- row_group_meta_data : & [ RowGroupMetaData ] ,
363- file_metadata : & ParquetMetaData ,
364- arrow_schema : & Schema ,
365- optimizer : & dyn crate :: access_plan_optimizer:: AccessPlanOptimizer ,
366- ) -> Result < PreparedAccessPlan > {
367- let plan = self . prepare ( row_group_meta_data) ?;
368- optimizer. optimize ( plan, file_metadata, arrow_schema)
369- }
370356}
371357
372358/// Represents a prepared, fully resolved [`ParquetAccessPlan`]
@@ -436,8 +422,6 @@ impl PreparedAccessPlan {
436422 }
437423 } ;
438424
439- let descending = first_sort_expr. options . descending ;
440-
441425 // Build statistics converter for this column
442426 let converter = match StatisticsConverter :: try_new (
443427 column. name ( ) ,
@@ -451,40 +435,31 @@ impl PreparedAccessPlan {
451435 }
452436 } ;
453437
454- // Get the relevant statistics for the selected row groups.
455- // For ASC sort: use min values — we want the RG with the smallest min
456- // to come first (best candidate for "smallest values").
457- // For DESC sort: use max values — we want the RG with the largest max
458- // to come first (best candidate for "largest values"). Using min for
459- // DESC can pick a worse first RG when ranges overlap (e.g., RG0 50-60
460- // vs RG1 40-100 — RG1 has larger values but smaller min).
438+ // Always sort by min values in ASC order to align row groups with
439+ // the file's declared output ordering. Direction (DESC) is handled
440+ // separately by ReverseRowGroups which is applied AFTER reorder.
441+ //
442+ // This composable design avoids the problem where reorder(DESC)
443+ // followed by reverse would double-flip the order, and ensures
444+ // that for already-sorted data, reorder is a no-op and reverse
445+ // gives the correct DESC order (including placing small tail RGs first).
461446 let rg_metadata: Vec < & RowGroupMetaData > = self
462447 . row_group_indexes
463448 . iter ( )
464449 . map ( |& idx| file_metadata. row_group ( idx) )
465450 . collect ( ) ;
466451
467- let stat_values = if descending {
468- match converter. row_group_maxes ( rg_metadata. iter ( ) . copied ( ) ) {
469- Ok ( vals) => vals,
470- Err ( e) => {
471- debug ! ( "Skipping RG reorder: cannot get max values: {e}" ) ;
472- return Ok ( self ) ;
473- }
474- }
475- } else {
476- match converter. row_group_mins ( rg_metadata. iter ( ) . copied ( ) ) {
477- Ok ( vals) => vals,
478- Err ( e) => {
479- debug ! ( "Skipping RG reorder: cannot get min values: {e}" ) ;
480- return Ok ( self ) ;
481- }
452+ let stat_values = match converter. row_group_mins ( rg_metadata. iter ( ) . copied ( ) ) {
453+ Ok ( vals) => vals,
454+ Err ( e) => {
455+ debug ! ( "Skipping RG reorder: cannot get min values: {e}" ) ;
456+ return Ok ( self ) ;
482457 }
483458 } ;
484459
485- // Sort indices by statistic values (min for ASC, max for DESC)
460+ // Always sort ASC by min values — direction is handled by reverse
486461 let sort_options = arrow:: compute:: SortOptions {
487- descending,
462+ descending : false ,
488463 nulls_first : first_sort_expr. options . nulls_first ,
489464 } ;
490465 let sorted_indices =
@@ -836,7 +811,11 @@ mod test {
836811 }
837812
838813 #[ test]
839- fn test_reorder_by_statistics_desc ( ) {
814+ fn test_reorder_by_statistics_desc_sorts_asc ( ) {
815+ // reorder_by_statistics always sorts by min ASC regardless of sort
816+ // direction. DESC is handled separately by ReverseRowGroups which
817+ // is applied after reorder in the optimizer pipeline.
818+ //
840819 // RGs: [50-99, 200-299, 1-30]
841820 let metadata = make_metadata_with_stats ( & [ ( 50 , 99 ) , ( 200 , 299 ) , ( 1 , 30 ) ] ) ;
842821 let schema = make_arrow_schema ( ) ;
@@ -847,8 +826,9 @@ mod test {
847826 . reorder_by_statistics ( & sort_order, & metadata, & schema)
848827 . unwrap ( ) ;
849828
850- // DESC: largest max first: RG1(max=299), RG0(max=99), RG2(max=30)
851- assert_eq ! ( plan. row_group_indexes, vec![ 1 , 0 , 2 ] ) ;
829+ // Always ASC by min: RG2(min=1), RG0(min=50), RG1(min=200)
830+ // Reverse is applied separately for DESC queries.
831+ assert_eq ! ( plan. row_group_indexes, vec![ 2 , 0 , 1 ] ) ;
852832 }
853833
854834 #[ test]
@@ -949,17 +929,14 @@ mod test {
949929 }
950930
951931 #[ test]
952- fn test_reorder_by_statistics_desc_uses_max_for_overlapping_rgs ( ) {
953- // Overlapping ranges where min DESC would pick worse RG than max DESC:
954- // RG0: 50-60 (small range, moderate max)
955- // RG1: 40-100 (wide range, high max but lower min)
956- // RG2: 20-30 (low max)
957- //
958- // For ORDER BY DESC LIMIT N:
959- // Using min DESC: [RG0(50), RG1(40), RG2(20)] → reads RG0 first (max=60 only)
960- // Using max DESC: [RG1(100), RG0(60), RG2(30)] → reads RG1 first (max=100)
932+ fn test_reorder_by_statistics_overlapping_rgs_sorts_asc ( ) {
933+ // Overlapping ranges — reorder always uses min ASC:
934+ // RG0: 50-60
935+ // RG1: 40-100 (lower min, wider range)
936+ // RG2: 20-30 (lowest min)
961937 //
962- // RG1 is the better first choice for DESC because it contains the largest values.
938+ // Sorted by min ASC: [RG2(20), RG1(40), RG0(50)]
939+ // For DESC queries, ReverseRowGroups is applied after to flip order.
963940 let metadata = make_metadata_with_stats ( & [ ( 50 , 60 ) , ( 40 , 100 ) , ( 20 , 30 ) ] ) ;
964941 let schema = make_arrow_schema ( ) ;
965942 let sort_order = make_sort_order_desc ( ) ;
@@ -969,8 +946,8 @@ mod test {
969946 . reorder_by_statistics ( & sort_order, & metadata, & schema)
970947 . unwrap ( ) ;
971948
972- // Expected: RG1 (max=100) first, then RG0 (max=60 ), then RG2 (max=30 )
973- assert_eq ! ( plan. row_group_indexes, vec![ 1 , 0 , 2 ] ) ;
949+ // Always ASC by min: RG2(min=20), RG1(min=40 ), RG0(min=50 )
950+ assert_eq ! ( plan. row_group_indexes, vec![ 2 , 1 , 0 ] ) ;
974951 }
975952
976953 #[ test]
0 commit comments