Skip to content

Commit aed0d9c

Browse files
yujun777Copilot
andcommitted
[fix](fe) Fix isCountStar incorrectly treating count(null) as count(*)
### What problem does this PR solve? Issue Number: close #xxx Problem Summary: Count.isCountStar() returns true for count(null) because it only checks `child(0) instanceof Literal` without excluding NullLiteral. Since count(null) always returns 0 (it counts non-null values of a constant null), it is semantically different from count(*) which counts all rows. This could cause incorrect results if downstream rewrite rules (e.g. PushDownAggThroughJoin, SimplifyWindowExpression, PushCountIntoUnionAll) use isCountStar() before CountLiteralRewrite has a chance to rewrite count(null) to 0. ### Release note Fixed count(null) being incorrectly treated as count(*) in query optimizer, which could lead to wrong results in certain rewrite scenarios. ### Check List (For Author) - Test: Unit Test / Regression test - Behavior changed: No - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4e81ace commit aed0d9c

File tree

5 files changed

+213
-1
lines changed

5 files changed

+213
-1
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ private Count(boolean isStar, AggregateFunctionParams functionParams) {
7777
public boolean isCountStar() {
7878
return isStar
7979
|| children.isEmpty()
80-
|| (children.size() == 1 && child(0) instanceof Literal);
80+
|| (children.size() == 1 && child(0) instanceof Literal && !child(0).isNullLiteral());
8181
}
8282

8383
@Override

fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CountLiteralRewriteTest.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,14 @@
1818
package org.apache.doris.nereids.rules.rewrite;
1919

2020
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
21+
import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
22+
import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
23+
import org.apache.doris.nereids.types.BigIntType;
2124
import org.apache.doris.nereids.util.MemoPatternMatchSupported;
2225
import org.apache.doris.nereids.util.PlanChecker;
2326
import org.apache.doris.utframe.TestWithFeService;
2427

28+
import org.junit.jupiter.api.Assertions;
2529
import org.junit.jupiter.api.Test;
2630

2731
/**
@@ -71,4 +75,32 @@ void testCountNull() {
7175
.matches(logicalAggregate().when(agg -> agg.getExpressions().stream().noneMatch(Count.class::isInstance)))
7276
.printlnTree();
7377
}
78+
79+
@Test
80+
void testCountNullIsNotCountStar() {
81+
// count(null) should NOT be treated as count(*)
82+
Count countNull = new Count(NullLiteral.INSTANCE);
83+
Assertions.assertFalse(countNull.isCountStar(),
84+
"count(null) should not be count star, because count(null) is always 0");
85+
86+
// typed null literal: count(CAST(null AS BIGINT)) should NOT be count(*)
87+
Count countTypedNull = new Count(new NullLiteral(BigIntType.INSTANCE));
88+
Assertions.assertFalse(countTypedNull.isCountStar(),
89+
"count(typed null) should not be count star");
90+
91+
// count(distinct null) should NOT be treated as count(*)
92+
Count countDistinctNull = new Count(true, NullLiteral.INSTANCE);
93+
Assertions.assertFalse(countDistinctNull.isCountStar(),
94+
"count(distinct null) should not be count star");
95+
96+
// count(1) should be treated as count(*)
97+
Count countOne = new Count(new BigIntLiteral(1));
98+
Assertions.assertTrue(countOne.isCountStar(),
99+
"count(1) should be count star");
100+
101+
// count(*) should be treated as count(*)
102+
Count countStar = new Count();
103+
Assertions.assertTrue(countStar.isCountStar(),
104+
"count(*) should be count star");
105+
}
74106
}

fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinTest.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.doris.nereids.trees.expressions.Multiply;
2424
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
2525
import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
26+
import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
2627
import org.apache.doris.nereids.trees.plans.JoinType;
2728
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
2829
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
@@ -165,6 +166,31 @@ void testSingleCountStarEmptyGroupBy() {
165166
);
166167
}
167168

169+
@Test
170+
void testCountNullNotPushedDown() {
171+
// count(null) should NOT be treated as count(*) and should NOT be pushed down.
172+
// NullLiteral is neither isCountStar() nor instanceof Slot,
173+
// so the rule's predicate rejects the aggregate.
174+
Alias countNull = new Count(NullLiteral.INSTANCE).alias("countNull");
175+
LogicalPlan plan = new LogicalPlanBuilder(scan1)
176+
.join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0))
177+
.aggGroupUsingIndex(ImmutableList.of(0),
178+
ImmutableList.of(scan1.getOutput().get(0), countNull))
179+
.build();
180+
181+
// Should NOT rewrite — aggregate stays above the original join.
182+
PlanChecker.from(MemoTestUtils.createConnectContext(), plan)
183+
.applyTopDown(new PushDownAggThroughJoin())
184+
.matches(
185+
logicalAggregate(
186+
logicalJoin(
187+
logicalOlapScan(),
188+
logicalOlapScan()
189+
)
190+
)
191+
);
192+
}
193+
168194
@Test
169195
void testBothSideCountAndCountStar() {
170196
Alias leftCnt = new Count(scan1.getOutput().get(0)).alias("leftCnt");
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !count_null --
3+
0
4+
5+
-- !count_star --
6+
4
7+
8+
-- !count_literal --
9+
4
10+
11+
-- !count_null_group_by --
12+
1 0
13+
2 0
14+
3 0
15+
4 0
16+
17+
-- !count_mixed --
18+
0 4 2 2
19+
20+
-- !count_null_union --
21+
0
22+
23+
-- !count_null_window --
24+
1 0
25+
2 0
26+
3 0
27+
4 0
28+
29+
-- !count_null_join_grouped --
30+
1 0 1
31+
2 0 1
32+
33+
-- !count_star_join --
34+
2 0
35+
36+
-- !count_null_index --
37+
0 2
38+
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("count_null_not_count_star") {
19+
sql "SET enable_nereids_planner=true"
20+
sql "SET enable_fallback_to_original_planner=false"
21+
22+
sql "DROP TABLE IF EXISTS count_null_test"
23+
24+
sql """
25+
create table count_null_test(pk int, a int, b int) distributed by hash(pk) buckets 10
26+
properties('replication_num' = '1');
27+
"""
28+
29+
sql """
30+
insert into count_null_test values(1, 1, 1), (2, null, 2), (3, 3, null), (4, null, null);
31+
"""
32+
sql "sync"
33+
34+
// count(null) should always return 0 regardless of row count
35+
order_qt_count_null """select count(null) from count_null_test"""
36+
37+
// count(*) should return total number of rows
38+
order_qt_count_star """select count(*) from count_null_test"""
39+
40+
// count(1) should be equivalent to count(*)
41+
order_qt_count_literal """select count(1) from count_null_test"""
42+
43+
// count(null) with group by should return 0 for each group
44+
order_qt_count_null_group_by """select pk, count(null) from count_null_test group by pk order by pk"""
45+
46+
// mixed: count(null) vs count(*) vs count(column)
47+
order_qt_count_mixed """select count(null), count(*), count(a), count(b) from count_null_test"""
48+
49+
// count(null) in subquery with union all
50+
order_qt_count_null_union """
51+
select count(null) from (
52+
select a from count_null_test
53+
union all
54+
select a from count_null_test
55+
) t
56+
"""
57+
58+
// count(null) as window function should still return 0
59+
order_qt_count_null_window """
60+
select pk, count(null) over (order by pk) from count_null_test order by pk
61+
"""
62+
63+
// count(null) through join with GROUP BY: exercises PushDownAggThroughJoin path.
64+
// With the bug, count(null) would be treated as count(*) and pushed down through join,
65+
// producing wrong non-zero results. With the fix, it correctly returns 0.
66+
sql "DROP TABLE IF EXISTS count_null_test2"
67+
sql """
68+
create table count_null_test2(pk int, c int) distributed by hash(pk) buckets 10
69+
properties('replication_num' = '1');
70+
"""
71+
sql """insert into count_null_test2 values(1, 10), (2, 20);"""
72+
sql "sync"
73+
74+
order_qt_count_null_join_grouped """
75+
select t1.pk, count(null), count(*)
76+
from count_null_test t1 inner join count_null_test2 t2 on t1.pk = t2.pk
77+
group by t1.pk order by t1.pk
78+
"""
79+
80+
// count(null) vs count(*) through join without GROUP BY for comparison
81+
order_qt_count_star_join """
82+
select count(*), count(null) from count_null_test t1 inner join count_null_test2 t2 on t1.pk = t2.pk
83+
"""
84+
85+
// COUNT_ON_INDEX path: count(null) on a table with inverted index should NOT use COUNT_ON_MATCH
86+
sql "DROP TABLE IF EXISTS count_null_idx_test"
87+
sql """
88+
create table count_null_idx_test(
89+
pk int,
90+
content varchar(200),
91+
INDEX idx_content (content) USING INVERTED PROPERTIES("parser" = "english")
92+
) duplicate key(pk)
93+
distributed by hash(pk) buckets 1
94+
properties('replication_num' = '1');
95+
"""
96+
sql """insert into count_null_idx_test values(1, 'hello world'), (2, 'doris test'), (3, 'hello doris');"""
97+
sql "sync"
98+
99+
// count(*) with MATCH predicate may use COUNT_ON_MATCH optimization
100+
// count(null) with same predicate must NOT — verify via explain
101+
def explainCountStar = sql """
102+
explain shape plan select count(*) from count_null_idx_test where content match 'hello'
103+
"""
104+
def explainCountNull = sql """
105+
explain shape plan select count(null) from count_null_idx_test where content match 'hello'
106+
"""
107+
// count(null) plan must not contain COUNT_ON_MATCH
108+
def countNullPlan = explainCountNull.collect { it[0] }.join("\n")
109+
assertFalse(countNullPlan.contains("COUNT_ON_MATCH"),
110+
"count(null) should not use COUNT_ON_MATCH optimization, plan:\n${countNullPlan}")
111+
112+
// result correctness: count(null) should be 0 even with MATCH filter
113+
order_qt_count_null_index """
114+
select count(null), count(*) from count_null_idx_test where content match 'hello'
115+
"""
116+
}

0 commit comments

Comments
 (0)