@@ -563,3 +563,103 @@ ORDER BY start_timestamp, trace_id
563563LIMIT 1;
564564----
5655652024-10-01T00:00:00
566+
567+
568+ statement ok
569+ set datafusion.execution.parquet.pushdown_filters = false;
570+
571+ # Regression test for https://github.com/apache/datafusion/issues/20696
572+ # Multi-column INNER JOIN with dictionary fails
573+ # when parquet pushdown filters are enabled.
574+
575+ statement ok
576+ COPY (
577+ SELECT
578+ to_timestamp_nanos(time_ns) AS time,
579+ arrow_cast(state, 'Dictionary(Int32, Utf8)') AS state,
580+ arrow_cast(city, 'Dictionary(Int32, Utf8)') AS city,
581+ temp
582+ FROM (
583+ VALUES
584+ (200, 'CA', 'LA', 90.0),
585+ (250, 'MA', 'Boston', 72.4),
586+ (100, 'MA', 'Boston', 70.4),
587+ (350, 'CA', 'LA', 90.0)
588+ ) AS t(time_ns, state, city, temp)
589+ )
590+ TO 'test_files/scratch/parquet_filter_pushdown/issue_20696/h2o/data.parquet';
591+
592+ statement ok
593+ COPY (
594+ SELECT
595+ to_timestamp_nanos(time_ns) AS time,
596+ arrow_cast(state, 'Dictionary(Int32, Utf8)') AS state,
597+ arrow_cast(city, 'Dictionary(Int32, Utf8)') AS city,
598+ temp,
599+ reading
600+ FROM (
601+ VALUES
602+ (250, 'MA', 'Boston', 53.4, 51.0),
603+ (100, 'MA', 'Boston', 50.4, 50.0)
604+ ) AS t(time_ns, state, city, temp, reading)
605+ )
606+ TO 'test_files/scratch/parquet_filter_pushdown/issue_20696/o2/data.parquet';
607+
608+ statement ok
609+ CREATE EXTERNAL TABLE h2o_parquet_20696 STORED AS PARQUET
610+ LOCATION 'test_files/scratch/parquet_filter_pushdown/issue_20696/h2o/';
611+
612+ statement ok
613+ CREATE EXTERNAL TABLE o2_parquet_20696 STORED AS PARQUET
614+ LOCATION 'test_files/scratch/parquet_filter_pushdown/issue_20696/o2/';
615+
616+ # Query should work both with and without filters
617+ statement ok
618+ set datafusion.execution.parquet.pushdown_filters = false;
619+
620+ query RRR
621+ SELECT
622+ h2o_parquet_20696.temp AS h2o_temp,
623+ o2_parquet_20696.temp AS o2_temp,
624+ o2_parquet_20696.reading
625+ FROM h2o_parquet_20696
626+ INNER JOIN o2_parquet_20696
627+ ON h2o_parquet_20696.time = o2_parquet_20696.time
628+ AND h2o_parquet_20696.state = o2_parquet_20696.state
629+ AND h2o_parquet_20696.city = o2_parquet_20696.city
630+ WHERE h2o_parquet_20696.time >= '1970-01-01T00:00:00.000000050Z'
631+ AND h2o_parquet_20696.time <= '1970-01-01T00:00:00.000000300Z';
632+ ----
633+ 72.4 53.4 51
634+ 70.4 50.4 50
635+
636+
637+ statement ok
638+ set datafusion.execution.parquet.pushdown_filters = true;
639+
640+ query RRR
641+ SELECT
642+ h2o_parquet_20696.temp AS h2o_temp,
643+ o2_parquet_20696.temp AS o2_temp,
644+ o2_parquet_20696.reading
645+ FROM h2o_parquet_20696
646+ INNER JOIN o2_parquet_20696
647+ ON h2o_parquet_20696.time = o2_parquet_20696.time
648+ AND h2o_parquet_20696.state = o2_parquet_20696.state
649+ AND h2o_parquet_20696.city = o2_parquet_20696.city
650+ WHERE h2o_parquet_20696.time >= '1970-01-01T00:00:00.000000050Z'
651+ AND h2o_parquet_20696.time <= '1970-01-01T00:00:00.000000300Z';
652+ ----
653+ 72.4 53.4 51
654+ 70.4 50.4 50
655+
656+ # Cleanup
657+ statement ok
658+ DROP TABLE h2o_parquet_20696;
659+
660+ statement ok
661+ DROP TABLE o2_parquet_20696;
662+
663+ # Cleanup settings
664+ statement ok
665+ set datafusion.execution.parquet.pushdown_filters = false;
0 commit comments