-
Notifications
You must be signed in to change notification settings - Fork 5k
fix: SIGSEGV in fillVgroupDataCxt when blob and non-blob tables share vgroup #35261
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -543,11 +543,11 @@ static int32_t fillVgroupDataCxt(STableDataCxt* pTableCxt, SVgroupDataCxt* pVgCx | |
| if (pVgCxt->pData->aSubmitTbData == NULL) { | ||
| return terrno; | ||
| } | ||
| if (pTableCxt->hasBlob) { | ||
| pVgCxt->pData->aSubmitBlobData = taosArrayInit(128, sizeof(SBlobSet*)); | ||
| if (NULL == pVgCxt->pData->aSubmitBlobData) { | ||
| return terrno; | ||
| } | ||
| } | ||
| if (pTableCxt->hasBlob && NULL == pVgCxt->pData->aSubmitBlobData) { | ||
| pVgCxt->pData->aSubmitBlobData = taosArrayInit(128, sizeof(SBlobSet*)); | ||
| if (NULL == pVgCxt->pData->aSubmitBlobData) { | ||
| return terrno; | ||
| } | ||
| } | ||
|
Comment on lines
+547
to
552
|
||
|
|
||
|
|
@@ -711,11 +711,11 @@ int32_t checkAndMergeSVgroupDataCxtByTbname(STableDataCxt* pTbCtx, SVgroupDataCx | |
| if (NULL == pVgCxt->pData->aSubmitTbData) { | ||
| return terrno; | ||
| } | ||
| if (pTbCtx->hasBlob) { | ||
| pVgCxt->pData->aSubmitBlobData = taosArrayInit(128, sizeof(SBlobSet*)); | ||
| if (pVgCxt->pData->aSubmitBlobData == NULL) { | ||
| return terrno; | ||
| } | ||
| } | ||
| if (pTbCtx->hasBlob && NULL == pVgCxt->pData->aSubmitBlobData) { | ||
| pVgCxt->pData->aSubmitBlobData = taosArrayInit(128, sizeof(SBlobSet*)); | ||
| if (pVgCxt->pData->aSubmitBlobData == NULL) { | ||
| return terrno; | ||
| } | ||
| } | ||
|
Comment on lines
+715
to
720
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the issue in if (pTbCtx->hasBlob && NULL == pVgCxt->pData->aSubmitBlobData) {
pVgCxt->pData->aSubmitBlobData = taosArrayInit(128, sizeof(SBlobSet*));
if (pVgCxt->pData->aSubmitBlobData == NULL) {
return terrno;
}
int32_t n = taosArrayGetSize(pVgCxt->pData->aSubmitTbData);
for (int32_t i = 0; i < n; ++i) {
SBlobSet* pNull = NULL;
if (NULL == taosArrayPush(pVgCxt->pData->aSubmitBlobData, &pNull)) {
return terrno;
}
}
}
Comment on lines
+715
to
720
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| from new_test_framework.utils import tdLog, tdSql, sc, clusterComCheck | ||
|
|
||
|
|
||
| class TestWriteBlobMultiTable: | ||
| """Regression test for SIGSEGV in fillVgroupDataCxt when aSubmitBlobData is NULL. | ||
|
|
||
| Bug: parInsertUtil.c fillVgroupDataCxt() crashed because aSubmitBlobData | ||
| was only initialized when the FIRST table in a vgroup had hasBlob=true. | ||
|
|
||
| Minimal reproduction: Create a database with vgroups=1, then create two | ||
| supertables (one WITHOUT blob, one WITH blob). Their child tables share | ||
| the same vgroup (vgId=1). When insMergeTableDataCxt groups tables by vgId, | ||
| a non-blob child table processed first leaves aSubmitBlobData=NULL, then | ||
| a blob child table crashes on taosArrayPush(NULL, ...). | ||
| """ | ||
|
|
||
| def setup_class(cls): | ||
| tdLog.debug(f"start to execute {__file__}") | ||
|
|
||
| def test_blob_crash_vgroup_shared(self): | ||
| """Two supertables (with/without blob) share one vgroup -> crash | ||
|
|
||
| 1. Create database with vgroups=1 | ||
| 2. Create supertable WITHOUT blob | ||
| 3. Create supertable WITH blob | ||
| 4. Multi-table INSERT into both supertables' child tables | ||
| 5. Both sets of child tables land in same vgId -> triggers crash | ||
| if non-blob table is processed first in hash iteration | ||
|
|
||
| Since: v3.3.8.0 | ||
| Labels: common,ci,blob | ||
| """ | ||
| dbname = "blb1" | ||
| tdSql.execute(f"create database if not exists {dbname} vgroups 1") | ||
| tdSql.execute(f"use {dbname}") | ||
|
|
||
| # Supertable WITHOUT blob | ||
| tdSql.execute( | ||
| "create stable if not exists st_noblob (ts timestamp, val int) tags (t1 int)" | ||
| ) | ||
| # Supertable WITH blob | ||
| tdSql.execute( | ||
| "create stable if not exists st_blob (ts timestamp, val int, data blob) tags (t1 int)" | ||
| ) | ||
|
|
||
| # Multi-table INSERT: mix child tables from both supertables | ||
| # All land in vgId=1 (vgroups=1) | ||
| # Insert multiple non-blob tables to increase probability they are | ||
| # processed first in hash iteration | ||
| sql = ( | ||
|
Comment on lines
+46
to
+50
|
||
| "insert into " | ||
| "n1 using st_noblob tags(1) values ('2024-01-01 00:00:00', 1) " | ||
| "n2 using st_noblob tags(2) values ('2024-01-01 00:00:01', 2) " | ||
| "n3 using st_noblob tags(3) values ('2024-01-01 00:00:02', 3) " | ||
| "b1 using st_blob tags(1) values ('2024-01-01 00:00:03', 4, 'blob_1') " | ||
| "n4 using st_noblob tags(4) values ('2024-01-01 00:00:04', 5) " | ||
| "b2 using st_blob tags(2) values ('2024-01-01 00:00:05', 6, 'blob_2') " | ||
| ) | ||
| tdSql.execute(sql) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is recommended to verify the actual content of the blob columns in the test case. Given the logic error in the current PR regarding array synchronization, blob data might be incorrectly associated with the wrong table or row. Verifying only the row count is insufficient to catch such data corruption issues. |
||
|
|
||
| # Verify data | ||
| tdSql.query("select count(*) from st_noblob") | ||
| tdSql.checkData(0, 0, 4) | ||
|
|
||
| tdSql.query("select count(*) from st_blob") | ||
| tdSql.checkData(0, 0, 2) | ||
|
|
||
| tdSql.query("select * from st_blob order by ts") | ||
| for i in range(tdSql.getRows()): | ||
| tdLog.info(f"row {i}: {tdSql.getData(i, 0)}, {tdSql.getData(i, 1)}, {tdSql.getData(i, 2)}") | ||
|
|
||
| tdSql.query("select * from st_noblob order by ts") | ||
| for i in range(tdSql.getRows()): | ||
| tdLog.info(f"row {i}: {tdSql.getData(i, 0)}, {tdSql.getData(i, 1)}") | ||
|
Comment on lines
+34
to
+74
|
||
|
|
||
| def test_blob_crash_more_tables(self): | ||
| """More tables to ensure hash iteration hits non-blob first | ||
|
|
||
| Same as above but with many more non-blob child tables to make | ||
| it almost certain that a non-blob table is processed before any | ||
| blob table in the hash iteration. | ||
|
|
||
| Since: v3.3.8.0 | ||
| Labels: common,ci,blob | ||
| """ | ||
| dbname = "blb2" | ||
| tdSql.execute(f"create database if not exists {dbname} vgroups 1") | ||
| tdSql.execute(f"use {dbname}") | ||
|
|
||
| tdSql.execute( | ||
| "create stable if not exists st_noblob (ts timestamp, val int) tags (t1 int)" | ||
| ) | ||
| tdSql.execute( | ||
| "create stable if not exists st_blob (ts timestamp, val int, data blob) tags (t1 int)" | ||
| ) | ||
|
|
||
| # 50 non-blob child tables + 10 blob child tables | ||
| parts = [] | ||
| for i in range(50): | ||
| parts.append( | ||
| f"n{i} using st_noblob tags({i}) values " | ||
| f"({1700000000000 + i}, {i})" | ||
| ) | ||
| for i in range(10): | ||
| parts.append( | ||
| f"b{i} using st_blob tags({i}) values " | ||
| f"({1700000001000 + i}, {i}, 'blob_{i}')" | ||
| ) | ||
| sql = "insert into " + " ".join(parts) | ||
| tdSql.execute(sql) | ||
|
|
||
| tdSql.query("select count(*) from st_noblob") | ||
| tdSql.checkData(0, 0, 50) | ||
|
|
||
| tdSql.query("select count(*) from st_blob") | ||
| tdSql.checkData(0, 0, 10) | ||
|
|
||
|
Comment on lines
+87
to
+117
|
||
| def test_blob_crash_normal_table(self): | ||
| """Minimal repro: two normal tables (one int, one blob) in same vgroup | ||
|
|
||
| INSERT INTO t1 VALUES(...) t2 VALUES(...) where t1 has no blob | ||
| and t2 has blob, both in vgroups=1. If t1 is iterated first, | ||
| aSubmitBlobData stays NULL and t2 crashes on taosArrayPush(NULL). | ||
|
|
||
| Since: v3.3.8.0 | ||
| Labels: common,ci,blob | ||
| """ | ||
| dbname = "blb3" | ||
| tdSql.execute(f"drop database if exists {dbname}") | ||
| tdSql.execute(f"create database {dbname} vgroups 1") | ||
| tdSql.execute(f"use {dbname}") | ||
|
|
||
| tdSql.execute("create table t1 (ts timestamp, v int)") | ||
| tdSql.execute("create table t2 (ts timestamp, v blob)") | ||
|
|
||
| tdSql.execute("insert into t1 values (now, 1) t2 values (now, 'hello')") | ||
|
|
||
| tdSql.query("select count(*) from t1") | ||
| tdSql.checkData(0, 0, 1) | ||
|
|
||
| tdSql.query("select count(*) from t2") | ||
| tdSql.checkData(0, 0, 1) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving the initialization of
aSubmitBlobDataout of the guard fixes the SIGSEGV, but it introduces a logic error regarding the synchronization betweenaSubmitTbDataandaSubmitBlobData. These two arrays must maintain a 1-to-1 correspondence because they are iterated together using the same index in functions likeinsResetBlob(line 1132).If
aSubmitBlobDatais initialized lazily, it will be missing entries for any non-blob tables already present inaSubmitTbData. Additionally, the current push logic (lines 561-567) only adds entries toaSubmitBlobDatafor tables wherehasBlobis true. This means any non-blob tables added afteraSubmitBlobDatais initialized will also cause the arrays to go out of sync.To fix this properly, you should:
aSubmitBlobDatawithNULLpointers for existing tables when it is lazily initialized.aSubmitBlobDatafor every table if the array has been initialized, even if the table has no blob.