From a746054456be40daae6b0111f49d9e5ec20ddf4d Mon Sep 17 00:00:00 2001 From: Pxl Date: Fri, 7 Feb 2025 18:57:04 +0800 Subject: [PATCH] [Bug](function) fix wrong result when percentile's second argument is 1 (#47586) ### What problem does this PR solve? fix wrong result when percentile's second argument is 1 related with https://github.com/apache/doris/pull/34382 ### Check List (For Author) - Test - [x] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [x] No. - [ ] Yes. - Does this need documentation? - [x] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/src/util/counts.h | 8 +++++--- .../test_aggregate_percentile_no_cast.out | 9 +++++++++ .../test_aggregate_percentile_no_cast.groovy | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/be/src/util/counts.h b/be/src/util/counts.h index 9f45eb4d42633a..43b815cf8cabe0 100644 --- a/be/src/util/counts.h +++ b/be/src/util/counts.h @@ -87,13 +87,15 @@ class Counts { // get val in aggregate_function_percentile_approx.h return 0.0; } - if (quantile == 1 || _nums.size() == 1) { - return _nums.back(); - } + if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) { pdqsort(_nums.begin(), _nums.end()); } + if (quantile == 1 || _nums.size() == 1) { + return _nums.back(); + } + double u = (_nums.size() - 1) * quantile; auto index = static_cast(u); return _nums[index] + diff --git a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out index 296589ed425c72..53cbce2c30bb85 100644 --- a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out +++ b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out @@ -37,3 +37,12 @@ -- !select -- [19.5, 24.25, 101.325] +-- !test -- +-128 127.0 126.93099999999995 -128.0 -127.9238 +-1 127.0 126.94250000000004 -1.0 -0.9995 +0 127.0 126.95400000000001 0.0 4.0E-4 +1 127.0 126.96549999999998 1.0 1.0003 +2 127.0 126.977 2.0 2.002 +12 127.0 126.9885 12.0 12.0115 +127 127.0 127.0 127.0 127.0 + diff --git a/regression-test/suites/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.groovy b/regression-test/suites/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.groovy index a7f382abe2175e..1a9a67ebbafd65 100644 --- a/regression-test/suites/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.groovy +++ b/regression-test/suites/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.groovy @@ -111,4 +111,23 @@ suite("test_aggregate_percentile_no_cast") { } sql "INSERT INTO percentile_test_db2 values(1,10.1), (2,8.2), (2,114.3) ,(3,10.4) ,(5,29.5) ,(6,101.6)" qt_select "select percentile_array(level,[0.5,0.55,0.805])from percentile_test_db2;" + + sql "DROP TABLE IF EXISTS TINYINTDATA_NOT_EMPTY_NOT_NULLABLE" + sql """ + CREATE TABLE TINYINTDATA_NOT_EMPTY_NOT_NULLABLE(id INT, data TINYINT NOT NULL) DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql "DROP TABLE IF EXISTS TEMPDATA" + sql """ + CREATE TABLE IF NOT EXISTS TEMPDATA(id INT, data INT) DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """ + INSERT INTO TINYINTDATA_NOT_EMPTY_NOT_NULLABLE values (0, -1),(1, 0),(2, 1),(3, 2),(4, 12),(5, -128),(6, 127); + """ + sql """ + INSERT INTO TEMPDATA values(1, 1); + """ + qt_test """ + SELECT ARG0,PERCENTILE(NULLABLE(ARG0),1) OVER(ORDER BY t.ARG0 ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ) as a,PERCENTILE(NULLABLE(ARG0),0.9999) OVER(ORDER BY t.ARG0 ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ) as b,PERCENTILE(NULLABLE(ARG0),0) OVER(ORDER BY t.ARG0 ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ) as c,PERCENTILE(NULLABLE(ARG0),0.0001) OVER(ORDER BY t.ARG0 ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ) as d FROM (SELECT TEMPDATA . data, TABLE0.ARG0 FROM TEMPDATA CROSS JOIN (SELECT data AS ARG0 + FROM TINYINTDATA_NOT_EMPTY_NOT_NULLABLE ) AS TABLE0) t GROUP BY ARG0 order by ARG0; + """ }