Skip to content

Commit 0c6d836

Browse files
committed
fix: text split code (#5773)
* fix: toolresponse result * remove log * stream remove * fix: text split code
1 parent b4be507 commit 0c6d836

File tree

5 files changed

+47
-7
lines changed

5 files changed

+47
-7
lines changed

document/content/docs/upgrading/4-13/4132.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ description: 'FastGPT V4.13.2 更新说明'
1717

1818
1. LLM 模型默认支持图片,导致请求错误。
1919
2. Mongo 多副本切换时候,watch 未重新触发。
20+
3. 文本分块,所有策略用完后,未处理 LastText 数据。
2021

2122
## 🔨 插件更新
2223

document/data/doc-last-modified.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
"document/content/docs/upgrading/4-12/4124.mdx": "2025-09-17T22:29:56+08:00",
114114
"document/content/docs/upgrading/4-13/4130.mdx": "2025-09-30T16:00:10+08:00",
115115
"document/content/docs/upgrading/4-13/4131.mdx": "2025-09-30T15:47:06+08:00",
116-
"document/content/docs/upgrading/4-13/4132.mdx": "2025-10-14T15:27:49+08:00",
116+
"document/content/docs/upgrading/4-13/4132.mdx": "2025-10-17T13:58:27+08:00",
117117
"document/content/docs/upgrading/4-8/40.mdx": "2025-08-02T19:38:37+08:00",
118118
"document/content/docs/upgrading/4-8/41.mdx": "2025-08-02T19:38:37+08:00",
119119
"document/content/docs/upgrading/4-8/42.mdx": "2025-08-02T19:38:37+08:00",

packages/global/common/string/textSplitter.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -295,17 +295,21 @@ const commonSplit = (props: SplitProps): SplitResponse => {
295295
const isMarkdownStep = checkIsMarkdownSplit(step);
296296
const isCustomStep = checkIsCustomStep(step);
297297
const forbidConcat = isCustomStep; // forbid=true时候,lastText肯定为空
298-
const textLength = getTextValidLength(text);
299298

300299
// Over step
301300
if (step >= stepReges.length) {
302-
if (textLength < maxSize) {
303-
return [text];
301+
// Merge lastText with current text to prevent data loss
302+
const combinedText = lastText + text;
303+
const combinedLength = getTextValidLength(combinedText);
304+
305+
if (combinedLength < maxSize) {
306+
return [combinedText];
304307
}
305308
// use slice-chunkSize to split text
309+
// Note: Use combinedText.length for slicing, not combinedLength
306310
const chunks: string[] = [];
307-
for (let i = 0; i < textLength; i += chunkSize - overlapLen) {
308-
chunks.push(text.slice(i, i + chunkSize));
311+
for (let i = 0; i < combinedText.length; i += chunkSize - overlapLen) {
312+
chunks.push(combinedText.slice(i, i + chunkSize));
309313
}
310314
return chunks;
311315
}

projects/app/src/service/common/system/volumnMongoWatch.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { getAppTemplatesAndLoadThem } from '@fastgpt/service/core/app/templates/
77
import { watchSystemModelUpdate } from '@fastgpt/service/core/ai/config/utils';
88
import { SystemConfigsTypeEnum } from '@fastgpt/global/common/system/config/constants';
99

10-
const changeStreams: any[] = [];
10+
let changeStreams: any[] = [];
1111

1212
export const startMongoWatch = async () => {
1313
cleanupMongoWatch();
@@ -56,4 +56,5 @@ const cleanupMongoWatch = () => {
5656
changeStreams.forEach((changeStream) => {
5757
changeStream?.close();
5858
});
59+
changeStreams = [];
5960
};

test/cases/global/common/string/textSplitter.test.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,3 +949,37 @@ it(`Test splitText2Chunks 13 - Table split with empty lastText`, () => {
949949

950950
expect(chunks).toEqual(mock.result);
951951
});
952+
953+
// Test for lastText handling when all strategies exhausted (Issue #5770)
954+
it(`Test splitText2Chunks 14 - lastText not lost when strategies exhausted`, () => {
955+
// This test verifies that when all splitting strategies are exhausted
956+
// and forced character-based splitting occurs, lastText is not lost.
957+
// The bug was: step >= stepReges.length returned [text] ignoring lastText
958+
959+
const mock = {
960+
// Create text with NO good split points (no punctuation, newlines, etc.)
961+
// This forces the algorithm to exhaust all strategies
962+
text: 'A'.repeat(1800),
963+
chunkSize: 500
964+
};
965+
966+
const { chunks, chars } = splitText2Chunks({
967+
text: mock.text,
968+
chunkSize: mock.chunkSize,
969+
overlapRatio: 0
970+
});
971+
972+
// Critical test: No data loss - total characters in chunks should equal input
973+
// This would fail with the bug because lastText would be dropped
974+
// Even if the text is not split (treated as one chunk), data should not be lost
975+
const totalCharsInChunks = chunks.join('').length;
976+
expect(totalCharsInChunks).toBe(mock.text.length);
977+
978+
// Also verify the chars count is correct
979+
expect(chars).toBe(mock.text.length);
980+
981+
// Verify no chunk is empty
982+
chunks.forEach((chunk) => {
983+
expect(chunk.length).toBeGreaterThan(0);
984+
});
985+
});

0 commit comments

Comments
 (0)