diff --git a/dbcon/execplan/treenode.h b/dbcon/execplan/treenode.h index 54a9f084b5..48590c3a1d 100644 --- a/dbcon/execplan/treenode.h +++ b/dbcon/execplan/treenode.h @@ -394,6 +394,15 @@ class TreeNode // double's range is +/-1.7E308 with at least 15 digits of precision char tmp[312]; // for conversion use + // Helpers to safely convert string values to numeric without asserting on NULL + inline int atoiFromStrVal() const { return atoi(fResult.strVal.safeString("").c_str()); } + inline float atofFromStrVal() const { return atof(fResult.strVal.safeString("").c_str()); } + inline double strtodFromStrVal() const { return strtod(fResult.strVal.safeString("").c_str(), nullptr); } + inline long double strtoldFromStrVal() const + { + return strtold(fResult.strVal.safeString("").c_str(), nullptr); + } + // @bug5635 If any item involved in this filter belongs to a derived table, // the derived table alias is added to the reference vector. std::string fDerivedTable; @@ -413,15 +422,13 @@ inline bool TreeNode::getBoolVal() if (fResultType.colWidth <= 8) return (atoi((char*)(&fResult.origIntVal)) != 0); - idbassert(fResult.strVal.str()); - return (atoi(fResult.strVal.str()) != 0); + return (atoiFromStrVal() != 0); case CalpontSystemCatalog::VARCHAR: if (fResultType.colWidth <= 7) return (atoi((char*)(&fResult.origIntVal)) != 0); - idbassert(fResult.strVal.str()); - return (atoi(fResult.strVal.str()) != 0); + return (atoiFromStrVal() != 0); // FIXME: Huh??? case CalpontSystemCatalog::VARBINARY: @@ -430,8 +437,7 @@ inline bool TreeNode::getBoolVal() if (fResultType.colWidth <= 7) return (atoi((char*)(&fResult.origIntVal)) != 0); - idbassert(fResult.strVal.str()); - return (atoi(fResult.strVal.str()) != 0); + return (atoiFromStrVal() != 0); case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::SMALLINT: @@ -799,15 +805,13 @@ inline float TreeNode::getFloatVal() if (fResultType.colWidth <= 8) return atof((char*)(&fResult.origIntVal)); - idbassert(fResult.strVal.str()); - return atof(fResult.strVal.str()); + return atofFromStrVal(); case CalpontSystemCatalog::VARCHAR: if (fResultType.colWidth <= 7) return atof((char*)(&fResult.origIntVal)); - idbassert(fResult.strVal.str()); - return atof(fResult.strVal.str()); + return atofFromStrVal(); // FIXME: ??? case CalpontSystemCatalog::VARBINARY: @@ -816,8 +820,7 @@ inline float TreeNode::getFloatVal() if (fResultType.colWidth <= 7) return atof((char*)(&fResult.origIntVal)); - idbassert(fResult.strVal.str()); - return atof(fResult.strVal.str()); + return atofFromStrVal(); case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::TINYINT: @@ -870,15 +873,13 @@ inline double TreeNode::getDoubleVal() if (fResultType.colWidth <= 8) return strtod((char*)(&fResult.origIntVal), nullptr); - idbassert(fResult.strVal.str()); - return strtod(fResult.strVal.str(), nullptr); + return strtodFromStrVal(); case CalpontSystemCatalog::VARCHAR: if (fResultType.colWidth <= 7) return strtod((char*)(&fResult.origIntVal), nullptr); - idbassert(fResult.strVal.str()); - return strtod(fResult.strVal.str(), nullptr); + return strtodFromStrVal(); // FIXME: ??? case CalpontSystemCatalog::VARBINARY: @@ -888,7 +889,7 @@ inline double TreeNode::getDoubleVal() return strtod((char*)(&fResult.origIntVal), nullptr); // idbassert(fResult.strVal.str()); - return strtod(fResult.strVal.safeString("").c_str(), nullptr); + return strtodFromStrVal(); case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::TINYINT: @@ -941,15 +942,13 @@ inline long double TreeNode::getLongDoubleVal() if (fResultType.colWidth <= 8) return strtold((char*)(&fResult.origIntVal), nullptr); - idbassert(fResult.strVal.str()); - return strtold(fResult.strVal.str(), nullptr); + return strtoldFromStrVal(); case CalpontSystemCatalog::VARCHAR: if (fResultType.colWidth <= 7) return strtold((char*)(&fResult.origIntVal), nullptr); - idbassert(fResult.strVal.str()); - return strtold(fResult.strVal.str(), nullptr); + return strtoldFromStrVal(); // FIXME: ??? case CalpontSystemCatalog::VARBINARY: @@ -958,8 +957,7 @@ inline long double TreeNode::getLongDoubleVal() if (fResultType.colWidth <= 7) return strtold((char*)(&fResult.origIntVal), nullptr); - idbassert(fResult.strVal.str()); - return strtold(fResult.strVal.str(), nullptr); + return strtoldFromStrVal(); case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::TINYINT: diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index 91d8d8afca..ebebc72f81 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -3251,7 +3251,65 @@ void doOR(ParseTree* n, JobInfo& jobInfo, bool tryCombine) if (jsv.empty()) { - // OR is processed as an expression + // Try to extract equi-join edges hidden inside OR before degrading to expression + // Traverse the subtree and collect SimpleFilter nodes that are '=' between two SimpleColumns from different tables + std::set> addedPairs; + JobStepVector joinStepsFromOr; + + std::stack st; + st.push(n); + while (!st.empty()) + { + ParseTree* cur = st.top(); + st.pop(); + if (!cur) + continue; + + // Always traverse children + if (cur->right()) st.push(cur->right()); + if (cur->left()) st.push(cur->left()); + + SimpleFilter* sf = dynamic_cast(cur->data()); + if (!sf) + continue; + + // Only '=' + const Operator* op = sf->op().get(); + if (!op || !(*op == opeq)) + continue; + + // Both sides must be simple columns and belong to different tables/views/aliases + SimpleColumn* sc1 = dynamic_cast(sf->lhs()); + SimpleColumn* sc2 = dynamic_cast(sf->rhs()); + if (!sc1 || !sc2) + continue; + + if (sc1->tableName() == sc2->tableName() && sc1->tableAlias() == sc2->tableAlias() && + sc1->viewName() == sc2->viewName()) + continue; // same table: not a join edge + + // Build a stable pair key to avoid duplicates + uint32_t tid1 = getTableKey(jobInfo, tableOid(sc1, jobInfo.csc), sc1->tableAlias(), sc1->schemaName(), + sc1->viewName(), sc1->partitions()); + uint32_t tid2 = getTableKey(jobInfo, tableOid(sc2, jobInfo.csc), sc2->tableAlias(), sc2->schemaName(), + sc2->viewName(), sc2->partitions()); + std::pair edge = (tid1 < tid2) ? std::make_pair(tid1, tid2) + : std::make_pair(tid2, tid1); + + if (addedPairs.insert(edge).second) + { + // Create a join step for this equality to populate the join graph + SOP sop(new PredicateOperator("=")); + JobStepVector oneJoin = doJoin(sc1, sc2, jobInfo, sop, sf); + joinStepsFromOr.insert(joinStepsFromOr.end(), oneJoin.begin(), oneJoin.end()); + } + } + + // First, add any extracted join steps (if any), then add the expression to preserve semantics + if (!joinStepsFromOr.empty()) + JLF_ExecPlanToJobList::addJobSteps(joinStepsFromOr, jobInfo, false); + + // OR is processed as an expression for actual filtering semantics jsv = doExpressionFilter(n, jobInfo); } diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 4f7c117c70..b5535f360d 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -1284,6 +1284,67 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) thd_set_ha_data(current_thd, mcs_hton, get_fe_conn_info_ptr()); } + // Handle NULL-safe equality '<=>' by rewriting into + // (lhs = rhs) OR (isnull(lhs) AND isnull(rhs)) + if (std::string(ifp->func_name()) == "<=>" && ifp->argument_count() == 2) + { + // Use operands from rcWorkStack as in standard relational ops + idbassert(gwip->rcWorkStack.size() >= 2); + ReturnedColumn* rhs = gwip->rcWorkStack.top(); + gwip->rcWorkStack.pop(); + ReturnedColumn* lhs = gwip->rcWorkStack.top(); + gwip->rcWorkStack.pop(); + + // Build equality via buildEqualityPredicate() so it is registered as an equi-join edge + std::vector itemList; + for (uint32_t i = 0; i < ifp->argument_count(); i++) + itemList.push_back(ifp->arguments()[i]); + + boost::shared_ptr op_eq(new PredicateOperator("=")); + if (!buildEqualityPredicate(lhs->clone(), rhs->clone(), gwip, op_eq, + Item_func::EQ_FUNC, itemList, /*isInSubs=*/false)) + { + return false; + } + // equality is now on ptWorkStack + idbassert(!gwip->ptWorkStack.empty()); + ParseTree* eq_pt = gwip->ptWorkStack.top(); + gwip->ptWorkStack.pop(); + + // isnull(lhs) + SimpleFilter* sf_isnull_lhs = new SimpleFilter(); + sf_isnull_lhs->timeZone(gwip->timeZone); + boost::shared_ptr op_isnull_lhs(new PredicateOperator("isnull")); + op_isnull_lhs->operationType(lhs->resultType()); + sf_isnull_lhs->op(op_isnull_lhs); + // yes, these are backwards in ConstPredicate, but here we need lhs value as first arg + // We implement isnull by putting tested expr on lhs and a NULL placeholder on rhs + sf_isnull_lhs->lhs(lhs->clone()); + sf_isnull_lhs->rhs(new ConstantColumn("", ConstantColumn::NULLDATA)); + + // isnull(rhs) + SimpleFilter* sf_isnull_rhs = new SimpleFilter(); + sf_isnull_rhs->timeZone(gwip->timeZone); + boost::shared_ptr op_isnull_rhs(new PredicateOperator("isnull")); + op_isnull_rhs->operationType(rhs->resultType()); + sf_isnull_rhs->op(op_isnull_rhs); + sf_isnull_rhs->lhs(rhs->clone()); + sf_isnull_rhs->rhs(new ConstantColumn("", ConstantColumn::NULLDATA)); + + // AND(isnull(lhs), isnull(rhs)) + ParseTree* pt_and = new ParseTree(new LogicOperator("and")); + pt_and->left(new ParseTree(sf_isnull_lhs)); + pt_and->right(new ParseTree(sf_isnull_rhs)); + + // OR( lhs = rhs, AND(...) ) + ParseTree* pt_or = new ParseTree(new LogicOperator("or")); + pt_or->left(eq_pt); + pt_or->right(pt_and); + + gwip->ptWorkStack.push(pt_or); + return true; + } + if (ifp->functype() == Item_func::BETWEEN) { idbassert(gwip->rcWorkStack.size() >= 3); diff --git a/mysql-test/columnstore/bugfixes/mcol-5675.result b/mysql-test/columnstore/bugfixes/mcol-5675.result new file mode 100644 index 0000000000..211cc807b2 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5675.result @@ -0,0 +1,64 @@ +DROP DATABASE IF EXISTS mcol_5675; +CREATE DATABASE mcol_5675; +USE mcol_5675; +CREATE TABLE t_varchar ( +v VARCHAR(10) NULL +) ENGINE=COLUMNSTORE; +INSERT INTO t_varchar (v) VALUES +(NULL), +(''), +('0'), +('1'), +('123'), +('-5'), +('3.14'), +('abc'); +SELECT v IS NULL AS is_null, IF(v, 1, 0) AS bool_from_v +FROM t_varchar +ORDER BY v IS NULL, v; +is_null bool_from_v +0 0 +0 1 +0 0 +0 1 +0 1 +0 1 +0 0 +1 0 +SELECT v, v + 0 AS plus0_int, v + 0.0 AS plus0_float +FROM t_varchar +ORDER BY v IS NULL, v; +v plus0_int plus0_float + 0 0 +-5 -5 -5 +0 0 0 +1 1 1 +123 123 123 +3.14 3.14 3.14 +abc 0 0 +NULL NULL NULL +SELECT v, ROUND(v + 0.0, 0) AS rounded +FROM t_varchar +ORDER BY v IS NULL, v; +v rounded + 0 +-5 -5 +0 0 +1 1 +123 123 +3.14 3 +abc 0 +NULL NULL +SELECT v FROM t_varchar WHERE v <=> '0' ORDER BY v; +v +0 +SELECT v FROM t_varchar WHERE v <=> 123 ORDER BY v; +v +123 +SELECT v FROM t_varchar WHERE v <=> '' ORDER BY v; +v + +SELECT v FROM t_varchar WHERE v <=> NULL ORDER BY v IS NULL, v; +v +NULL +DROP DATABASE mcol_5675; diff --git a/mysql-test/columnstore/bugfixes/mcol-5675.test b/mysql-test/columnstore/bugfixes/mcol-5675.test new file mode 100644 index 0000000000..1b881b23ae --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5675.test @@ -0,0 +1,47 @@ +--source ../include/have_columnstore.inc +--disable_warnings +DROP DATABASE IF EXISTS mcol_5675; +--enable_warnings +CREATE DATABASE mcol_5675; +USE mcol_5675; + +# Exercise safe string-to-numeric conversions on ColumnStore string columns +# Ensure NULLs and empty strings do not crash and evaluate to numeric 0 +CREATE TABLE t_varchar ( + v VARCHAR(10) NULL +) ENGINE=COLUMNSTORE; + +INSERT INTO t_varchar (v) VALUES + (NULL), + (''), + ('0'), + ('1'), + ('123'), + ('-5'), + ('3.14'), + ('abc'); + +# Boolean context evaluation (uses internal atoi/atof conversions) +SELECT v IS NULL AS is_null, IF(v, 1, 0) AS bool_from_v +FROM t_varchar +ORDER BY v IS NULL, v; + +# Arithmetic with implicit numeric conversion +SELECT v, v + 0 AS plus0_int, v + 0.0 AS plus0_float +FROM t_varchar +ORDER BY v IS NULL, v; + +# Rounding path via floating conversion +SELECT v, ROUND(v + 0.0, 0) AS rounded +FROM t_varchar +ORDER BY v IS NULL, v; + +# ColumnStore supports '<=>' in predicates (WHERE). Validate behavior +SELECT v FROM t_varchar WHERE v <=> '0' ORDER BY v; +SELECT v FROM t_varchar WHERE v <=> 123 ORDER BY v; +SELECT v FROM t_varchar WHERE v <=> '' ORDER BY v; +SELECT v FROM t_varchar WHERE v <=> NULL ORDER BY v IS NULL, v; + + +# cleanup +DROP DATABASE mcol_5675; diff --git a/utils/common/collation.h b/utils/common/collation.h index e5781b8d9c..746224d71b 100644 --- a/utils/common/collation.h +++ b/utils/common/collation.h @@ -116,12 +116,16 @@ class MariaDBHasher } MariaDBHasher& add(CHARSET_INFO* cs, const char* str, size_t length) { - cs->hash_sort((const uchar*)str, length, &mPart1, &mPart2); + // my_hash_sort expects a valid pointer even if length == 0. Substitute empty string for nullptr. + const char* safe = str ? str : ""; + cs->hash_sort((const uchar*)safe, length, &mPart1, &mPart2); return *this; } MariaDBHasher& add(CHARSET_INFO* cs, const utils::ConstString& str) { - return add(cs, str.str(), str.length()); + // utils::ConstString guarantees length==0 when pointer is nullptr. + // Still, ensure non-null pointer is passed to hash_sort. + return add(cs, str.str() ? str.str() : "", str.length()); } uint32_t finalize() const {