@@ -5708,7 +5708,7 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
5708
5708
paragraph->setXMax (paragraph->getXMin () + maxBlockLineWidth);
5709
5709
5710
5710
// adding previous block to the page element
5711
- if (readingOrder)
5711
+ if (readingOrder && num == 1 )
5712
5712
lastBlockInserted = addBlockInReadingOrder (paragraph, lineFontSize, lastBlockInserted);
5713
5713
else
5714
5714
blocks->append (paragraph);
@@ -5778,7 +5778,7 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
5778
5778
paragraph->setYMax (paragraph->getYMin () + blockHeight);
5779
5779
5780
5780
// adding previous block to the page element
5781
- if (readingOrder)
5781
+ if (readingOrder && num == 1 )
5782
5782
lastBlockInserted = addBlockInReadingOrder (paragraph, lineFontSize, lastBlockInserted);
5783
5783
else
5784
5784
blocks->append (paragraph);
@@ -5811,7 +5811,7 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
5811
5811
endPage = gFalse ;
5812
5812
5813
5813
if (paragraph != NULL ) {
5814
- if (readingOrder)
5814
+ if (readingOrder && num == 1 )
5815
5815
lastBlockInserted = addBlockInReadingOrder (paragraph, lineFontSize, lastBlockInserted);
5816
5816
else
5817
5817
blocks->append (paragraph);
@@ -6353,100 +6353,131 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
6353
6353
}
6354
6354
6355
6355
// PL: Insert a block in the page's block list according to the reading order
6356
- GBool TextPage::addBlockInReadingOrder (TextParagraph * block, double fontSize, GBool lastInserted) {
6356
+ // lastInserted: true if the previously added block has been inserted and not appended
6357
+ GBool TextPage::addBlockInReadingOrder (TextParagraph *block, double fontSize, GBool lastInserted) {
6357
6358
// if Y_pos of the block to be inserted is less than Y_pos of the existing block
6358
6359
// (i.e. block is located above)
6359
6360
// and, in case of vertical overlap,
6360
6361
// X_pos + width of the block to be inserted is less than X_pos of this existing block
6361
6362
// (i.e. block is on the left and the surfaces of the block are not overlaping -
6362
6363
// 2 columns case)
6363
6364
// then the block order is before the existing block
6364
- GBool notInserted = gTrue ;
6365
- int indexLowerBlock = 0 , insertIndex= 0 ;
6365
+ // GBool insertable = gFalse;
6366
+ int indexLowerBlock = 0 , indexUpperBlock = 0 ;
6367
+ // default insert place is at the end of the block list, so append
6368
+ int insertIndex = blocks->getLength ();
6366
6369
GBool firstLowerBlock = gFalse ;
6367
- GBool noVerticalOverlap = gTrue ;
6370
+ GBool firstUpperBlock = gFalse ;
6371
+ GBool verticalOverlap = gFalse ;
6372
+ double page_middle = pageWidth/2 ;
6368
6373
// we get the first child of the current page node
6369
6374
unsigned long nbChildren = blocks->getLength ();
6370
6375
if (nbChildren > 0 ) {
6371
- // coordinates of the block to be inserted
6372
- double blockX = block->getXMin ();
6373
- double blockY = block->getYMin ();
6374
-
6375
- double blockHeight = block->getYMax () - block->getYMin ();
6376
- double blockWidth = block->getXMax () - block->getXMin ();
6377
-
6378
- // cout << "to be inserted: " << nodeblock->name << ", X: " << blockX << ", Y: " << blockY << ", H: " << blockHeight << ", W: " << blockWidth << endl;
6379
-
6380
- TextParagraph * par;
6381
- // we get all the block nodes in the XML tree corresponding to the page
6382
- for (int i = 0 ; i <= blocks->getLength ()-1 && notInserted; i++) {
6383
- par = (TextParagraph *) blocks->get (i);
6384
- double currentY = par->getYMin ();
6385
6376
6386
- double currentX = par->getXMin ();
6387
-
6388
- double currentWidth = par->getXMax () - par->getXMin ();
6389
- double currentHeight = par->getYMax () - par->getXMin ();
6390
-
6391
- if ((currentY <= blockY && currentY + currentHeight >= blockY) ||
6392
- (blockY + blockHeight > currentY && blockY + blockHeight < currentY + currentHeight)){
6393
- noVerticalOverlap = gFalse ;
6377
+ if (block->getXMax () == 0 || block->getYMax () == 0 ) {
6378
+ double maxLineWidth = 0.0 ;
6379
+ double maxLineY = 0.0 ;
6380
+ // fix missing xmax or ymax at block level (usually single line block)
6381
+ for (int lineIdx = 0 ; lineIdx < block->lines ->getLength (); lineIdx++) {
6382
+ TextLine *line = (TextLine *) block->lines ->get (lineIdx);
6383
+ if (line->getYMax () > maxLineY)
6384
+ maxLineY = line->getYMax ();
6385
+ if (line->getXMax () - line->getXMin () > maxLineWidth)
6386
+ maxLineWidth = line->getXMax () - line->getXMin ();
6394
6387
}
6388
+ if (block->getXMax () == 0 )
6389
+ block->setXMax (block->getXMin () + maxLineWidth);
6390
+ if (block->getYMax () == 0 )
6391
+ block->setYMax (maxLineY);
6392
+ }
6395
6393
6396
- if (currentY < blockY)
6397
- continue ;
6398
-
6399
- if (blockY < currentY) {
6400
- if (blockY + blockHeight < currentY) {
6394
+ // coordinates of the block to be inserted
6395
+ double x = block->getXMin ();
6396
+ double y = block->getYMin ();
6397
+ double w = block->getXMax () - block->getXMin ();
6398
+ double h = block->getYMax () - block->getYMin ();
6399
+
6400
+ // cout << "to be inserted: " << " X: " << x << ", Y: " << y << ", H: " << h << ", W: " << w << ", X_max: " << block->getXMax() << ", Y_max: " << block->getYMax() << endl;
6401
+
6402
+ // check if the block is centered on the page
6403
+ GBool centered = gFalse ;
6404
+ if ( (block->getXMin () < page_middle) &&
6405
+ (block->getXMax () > page_middle) ) {
6406
+ int left_size = page_middle - block->getXMin ();
6407
+ int right_size = block->getXMax () - page_middle;
6408
+ // cout << "centered: " << std::abs(left_size-right_size) << endl;
6409
+ if (std::abs (left_size-right_size) < 20 ) {
6410
+ centered = gTrue ;
6411
+ }
6412
+ }
6413
+
6414
+ TextParagraph * currentBlock;
6415
+ // we get all the block nodes corresponding to the page, starting from the lower/last present block
6416
+ // to prioritize the stream order when it is valid
6417
+ // for (int i = 0; i < blocks->getLength(); i++) {
6418
+ for (int i = blocks->getLength ()-1 ; i >= 0 ; i--) {
6419
+ currentBlock = (TextParagraph *)blocks->get (i);
6420
+ double c_x = currentBlock->getXMin ();
6421
+ double c_y = currentBlock->getYMin ();
6422
+ double c_w = currentBlock->getXMax () - currentBlock->getXMin ();
6423
+ double c_h = currentBlock->getYMax () - currentBlock->getYMin ();
6424
+
6425
+ // cout << "current: " << " X: " << c_x << ", Y: " << c_y << ", H: " << c_h << ", W: " << c_w << endl;
6426
+
6427
+ if (y > c_y) {
6428
+ // if block is centered in the middle of the line, we don't consider column constraints
6429
+ if (centered) {
6430
+ break ;
6431
+ } else if (x+w < c_x) {
6432
+ // although lower, block to be added is entirely on the left of current,
6433
+ // so before in reading order
6434
+ insertIndex = i;
6435
+ } else {
6436
+ break ;
6437
+ }
6438
+ }
6401
6439
6402
- if (!notInserted)
6403
- continue ;
6404
- // we keep the first block under it, if no overlap put it above
6405
- if (!firstLowerBlock) {
6406
- indexLowerBlock = i;
6407
- firstLowerBlock = gTrue ;
6408
- }
6409
- // we don't have any vertical overlap
6410
- // check the X-pos, the block cannot be on the right of the current block
6411
- // check if the
6412
- if ((blockX <= currentX + currentWidth && blockX >= currentX) ||
6413
- (blockX <= currentX + currentWidth && blockX + blockWidth > currentX)||
6414
- blockX < currentX + currentWidth +fontSize * maxColSpacing
6415
- ) {
6416
- // we can insert the block before the current block
6417
- insertIndex = i;
6418
- notInserted = false ;
6419
- }
6420
- } else
6421
- noVerticalOverlap = gFalse ;
6440
+ if (
6441
+ ((y <= c_y+c_h) && (y+h > c_y+c_h)) ||
6442
+ ((y <= c_y) && (y+h > c_y))
6443
+ ) {
6444
+ verticalOverlap = gTrue ;
6422
6445
}
6423
- // we have vertical overlap, check position on X axis
6424
6446
6425
- /* double currentHeight = 0;
6426
- attrValue = xmlGetProp(cur_node, (const xmlChar*)ATTR_HEIGHT);
6427
- if (attrValue != NULL) {
6428
- currentHeight = atof((const char*)attrValue);
6429
- xmlFree(attrValue);
6447
+ if ((y+h < c_y) || verticalOverlap ) {
6448
+ // we are entirely above current block, no vertical overlap
6449
+ // so we might want to insert the block just above it
6450
+ // we need to check the column and general horizontal constraints
6451
+
6452
+ // if block is centered in the middle of the line, we don't consider column constraints
6453
+
6454
+ if (centered) {
6455
+ insertIndex = i;
6456
+ } else if ((x > c_x + c_w) && (x+w < (pageWidth*1.2 )/2 ) ) {
6457
+ // the block to be inserted is on the right of the current block
6458
+ // block is after current, we stop going up
6459
+ break ;
6460
+ } else
6461
+ insertIndex = i;
6462
+
6463
+ // we don't have any vertical overlap
6464
+ // check the X-pos, the block cannot be on the right of the current block
6465
+ // check if the
6466
+ /* if ((x <= c_x+c_w && x >= c_x) ||
6467
+ (x <= c_x+c_w && x+w > c_x) ||
6468
+ x < c_x+c_w + fontSize * maxColSpacing
6469
+ ) {
6470
+ // we can insert the block before the current block
6471
+ insertIndex = i;
6472
+ insertable = gTrue;
6473
+ break;
6430
6474
}*/
6475
+ }
6431
6476
}
6432
- if ((lastInserted || noVerticalOverlap) && firstLowerBlock){
6433
- insertIndex = indexLowerBlock;
6434
- notInserted = false ;
6435
- }
6436
- /* if (notInserted && (blockX + blockWidth < currentX)) {
6437
- // does not work for multi column sections one after the other
6438
- xmlNodePtr result = xmlAddPrevSibling(cur_node, nodeblock);
6439
- notInserted = false;
6440
- }*/
6441
6477
}
6442
6478
6443
- if (notInserted) {
6444
- blocks->append (block);
6445
- return gFalse ;
6446
- } else {
6447
- blocks->insert (insertIndex, block); // beware, the order can be the opposite if next block in next column..
6448
- return gTrue ;
6449
- }
6479
+ blocks->insert (insertIndex, block);
6480
+ return insertIndex;
6450
6481
}
6451
6482
6452
6483
void TextPage::addImageInlineNode (xmlNodePtr nodeline,
0 commit comments