@@ -949,3 +949,37 @@ it(`Test splitText2Chunks 13 - Table split with empty lastText`, () => {
949949
950950 expect ( chunks ) . toEqual ( mock . result ) ;
951951} ) ;
952+
953+ // Test for lastText handling when all strategies exhausted (Issue #5770)
954+ it ( `Test splitText2Chunks 14 - lastText not lost when strategies exhausted` , ( ) => {
955+ // This test verifies that when all splitting strategies are exhausted
956+ // and forced character-based splitting occurs, lastText is not lost.
957+ // The bug was: step >= stepReges.length returned [text] ignoring lastText
958+
959+ const mock = {
960+ // Create text with NO good split points (no punctuation, newlines, etc.)
961+ // This forces the algorithm to exhaust all strategies
962+ text : 'A' . repeat ( 1800 ) ,
963+ chunkSize : 500
964+ } ;
965+
966+ const { chunks, chars } = splitText2Chunks ( {
967+ text : mock . text ,
968+ chunkSize : mock . chunkSize ,
969+ overlapRatio : 0
970+ } ) ;
971+
972+ // Critical test: No data loss - total characters in chunks should equal input
973+ // This would fail with the bug because lastText would be dropped
974+ // Even if the text is not split (treated as one chunk), data should not be lost
975+ const totalCharsInChunks = chunks . join ( '' ) . length ;
976+ expect ( totalCharsInChunks ) . toBe ( mock . text . length ) ;
977+
978+ // Also verify the chars count is correct
979+ expect ( chars ) . toBe ( mock . text . length ) ;
980+
981+ // Verify no chunk is empty
982+ chunks . forEach ( ( chunk ) => {
983+ expect ( chunk . length ) . toBeGreaterThan ( 0 ) ;
984+ } ) ;
985+ } ) ;
0 commit comments