Skip to content

Commit 12a6488

Browse files
committed
iterate on reading order
1 parent d0c8739 commit 12a6488

File tree

1 file changed

+107
-76
lines changed

1 file changed

+107
-76
lines changed

src/XmlAltoOutputDev.cc

+107-76
Original file line numberDiff line numberDiff line change
@@ -5708,7 +5708,7 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
57085708
paragraph->setXMax(paragraph->getXMin() + maxBlockLineWidth);
57095709

57105710
// adding previous block to the page element
5711-
if(readingOrder)
5711+
if(readingOrder && num == 1)
57125712
lastBlockInserted = addBlockInReadingOrder(paragraph, lineFontSize, lastBlockInserted);
57135713
else
57145714
blocks->append(paragraph);
@@ -5778,7 +5778,7 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
57785778
paragraph->setYMax(paragraph->getYMin() + blockHeight);
57795779

57805780
// adding previous block to the page element
5781-
if(readingOrder)
5781+
if(readingOrder && num == 1)
57825782
lastBlockInserted = addBlockInReadingOrder(paragraph, lineFontSize, lastBlockInserted);
57835783
else
57845784
blocks->append(paragraph);
@@ -5811,7 +5811,7 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
58115811
endPage = gFalse;
58125812

58135813
if (paragraph != NULL) {
5814-
if(readingOrder)
5814+
if(readingOrder && num == 1)
58155815
lastBlockInserted = addBlockInReadingOrder(paragraph, lineFontSize, lastBlockInserted);
58165816
else
58175817
blocks->append(paragraph);
@@ -6353,100 +6353,131 @@ void TextPage::dump(GBool noLineNumbers, GBool fullFontName, vector<bool> lineNu
63536353
}
63546354

63556355
// PL: Insert a block in the page's block list according to the reading order
6356-
GBool TextPage::addBlockInReadingOrder(TextParagraph * block, double fontSize, GBool lastInserted) {
6356+
// lastInserted: true if the previously added block has been inserted and not appended
6357+
GBool TextPage::addBlockInReadingOrder(TextParagraph *block, double fontSize, GBool lastInserted) {
63576358
// if Y_pos of the block to be inserted is less than Y_pos of the existing block
63586359
// (i.e. block is located above)
63596360
// and, in case of vertical overlap,
63606361
// X_pos + width of the block to be inserted is less than X_pos of this existing block
63616362
// (i.e. block is on the left and the surfaces of the block are not overlaping -
63626363
// 2 columns case)
63636364
// then the block order is before the existing block
6364-
GBool notInserted = gTrue;
6365-
int indexLowerBlock = 0, insertIndex= 0;
6365+
//GBool insertable = gFalse;
6366+
int indexLowerBlock = 0, indexUpperBlock = 0;
6367+
// default insert place is at the end of the block list, so append
6368+
int insertIndex = blocks->getLength();
63666369
GBool firstLowerBlock = gFalse;
6367-
GBool noVerticalOverlap = gTrue;
6370+
GBool firstUpperBlock = gFalse;
6371+
GBool verticalOverlap = gFalse;
6372+
double page_middle = pageWidth/2;
63686373
// we get the first child of the current page node
63696374
unsigned long nbChildren = blocks->getLength();
63706375
if (nbChildren > 0) {
6371-
// coordinates of the block to be inserted
6372-
double blockX = block->getXMin();
6373-
double blockY = block->getYMin();
6374-
6375-
double blockHeight = block->getYMax() - block->getYMin();
6376-
double blockWidth = block->getXMax() - block->getXMin();
6377-
6378-
//cout << "to be inserted: " << nodeblock->name << ", X: " << blockX << ", Y: " << blockY << ", H: " << blockHeight << ", W: " << blockWidth << endl;
6379-
6380-
TextParagraph * par;
6381-
// we get all the block nodes in the XML tree corresponding to the page
6382-
for (int i = 0; i <= blocks->getLength()-1 && notInserted; i++) {
6383-
par = (TextParagraph *) blocks->get(i);
6384-
double currentY = par->getYMin();
63856376

6386-
double currentX = par->getXMin();
6387-
6388-
double currentWidth = par->getXMax() - par->getXMin();
6389-
double currentHeight = par->getYMax() - par->getXMin();
6390-
6391-
if((currentY <= blockY && currentY + currentHeight >= blockY) ||
6392-
(blockY + blockHeight > currentY && blockY + blockHeight < currentY + currentHeight)){
6393-
noVerticalOverlap = gFalse;
6377+
if (block->getXMax() == 0 || block->getYMax() == 0) {
6378+
double maxLineWidth = 0.0;
6379+
double maxLineY = 0.0;
6380+
// fix missing xmax or ymax at block level (usually single line block)
6381+
for (int lineIdx = 0; lineIdx < block->lines->getLength(); lineIdx++) {
6382+
TextLine *line = (TextLine *) block->lines->get(lineIdx);
6383+
if (line->getYMax() > maxLineY)
6384+
maxLineY = line->getYMax();
6385+
if (line->getXMax() - line->getXMin() > maxLineWidth)
6386+
maxLineWidth = line->getXMax() - line->getXMin();
63946387
}
6388+
if (block->getXMax() == 0)
6389+
block->setXMax(block->getXMin() + maxLineWidth);
6390+
if (block->getYMax() == 0)
6391+
block->setYMax(maxLineY);
6392+
}
63956393

6396-
if (currentY < blockY)
6397-
continue;
6398-
6399-
if (blockY < currentY) {
6400-
if (blockY + blockHeight < currentY) {
6394+
// coordinates of the block to be inserted
6395+
double x = block->getXMin();
6396+
double y = block->getYMin();
6397+
double w = block->getXMax() - block->getXMin();
6398+
double h = block->getYMax() - block->getYMin();
6399+
6400+
//cout << "to be inserted: " << " X: " << x << ", Y: " << y << ", H: " << h << ", W: " << w << ", X_max: " << block->getXMax() << ", Y_max: " << block->getYMax() << endl;
6401+
6402+
// check if the block is centered on the page
6403+
GBool centered = gFalse;
6404+
if ( (block->getXMin() < page_middle) &&
6405+
(block->getXMax() > page_middle) ) {
6406+
int left_size = page_middle - block->getXMin();
6407+
int right_size = block->getXMax() - page_middle;
6408+
//cout << "centered: " << std::abs(left_size-right_size) << endl;
6409+
if (std::abs(left_size-right_size) < 20) {
6410+
centered = gTrue;
6411+
}
6412+
}
6413+
6414+
TextParagraph * currentBlock;
6415+
// we get all the block nodes corresponding to the page, starting from the lower/last present block
6416+
// to prioritize the stream order when it is valid
6417+
//for (int i = 0; i < blocks->getLength(); i++) {
6418+
for (int i = blocks->getLength()-1; i >= 0; i--) {
6419+
currentBlock = (TextParagraph *)blocks->get(i);
6420+
double c_x = currentBlock->getXMin();
6421+
double c_y = currentBlock->getYMin();
6422+
double c_w = currentBlock->getXMax() - currentBlock->getXMin();
6423+
double c_h = currentBlock->getYMax() - currentBlock->getYMin();
6424+
6425+
//cout << "current: " << " X: " << c_x << ", Y: " << c_y << ", H: " << c_h << ", W: " << c_w << endl;
6426+
6427+
if (y > c_y) {
6428+
// if block is centered in the middle of the line, we don't consider column constraints
6429+
if (centered) {
6430+
break;
6431+
} else if (x+w < c_x) {
6432+
// although lower, block to be added is entirely on the left of current,
6433+
// so before in reading order
6434+
insertIndex = i;
6435+
} else {
6436+
break;
6437+
}
6438+
}
64016439

6402-
if(!notInserted)
6403-
continue;
6404-
// we keep the first block under it, if no overlap put it above
6405-
if(!firstLowerBlock) {
6406-
indexLowerBlock = i;
6407-
firstLowerBlock = gTrue;
6408-
}
6409-
// we don't have any vertical overlap
6410-
// check the X-pos, the block cannot be on the right of the current block
6411-
// check if the
6412-
if ((blockX <= currentX + currentWidth && blockX >= currentX) ||
6413-
(blockX <= currentX + currentWidth && blockX + blockWidth > currentX)||
6414-
blockX < currentX + currentWidth +fontSize * maxColSpacing
6415-
) {
6416-
// we can insert the block before the current block
6417-
insertIndex = i;
6418-
notInserted = false;
6419-
}
6420-
} else
6421-
noVerticalOverlap = gFalse;
6440+
if (
6441+
((y <= c_y+c_h) && (y+h > c_y+c_h)) ||
6442+
((y <= c_y) && (y+h > c_y))
6443+
) {
6444+
verticalOverlap = gTrue;
64226445
}
6423-
// we have vertical overlap, check position on X axis
64246446

6425-
/*double currentHeight = 0;
6426-
attrValue = xmlGetProp(cur_node, (const xmlChar*)ATTR_HEIGHT);
6427-
if (attrValue != NULL) {
6428-
currentHeight = atof((const char*)attrValue);
6429-
xmlFree(attrValue);
6447+
if ((y+h < c_y) || verticalOverlap ) {
6448+
// we are entirely above current block, no vertical overlap
6449+
// so we might want to insert the block just above it
6450+
// we need to check the column and general horizontal constraints
6451+
6452+
// if block is centered in the middle of the line, we don't consider column constraints
6453+
6454+
if (centered) {
6455+
insertIndex = i;
6456+
} else if ((x > c_x + c_w) && (x+w < (pageWidth*1.2)/2) ) {
6457+
// the block to be inserted is on the right of the current block
6458+
// block is after current, we stop going up
6459+
break;
6460+
} else
6461+
insertIndex = i;
6462+
6463+
// we don't have any vertical overlap
6464+
// check the X-pos, the block cannot be on the right of the current block
6465+
// check if the
6466+
/*if ((x <= c_x+c_w && x >= c_x) ||
6467+
(x <= c_x+c_w && x+w > c_x) ||
6468+
x < c_x+c_w + fontSize * maxColSpacing
6469+
) {
6470+
// we can insert the block before the current block
6471+
insertIndex = i;
6472+
insertable = gTrue;
6473+
break;
64306474
}*/
6475+
}
64316476
}
6432-
if((lastInserted || noVerticalOverlap) && firstLowerBlock){
6433-
insertIndex = indexLowerBlock;
6434-
notInserted = false;
6435-
}
6436-
/*if (notInserted && (blockX + blockWidth < currentX)) {
6437-
// does not work for multi column sections one after the other
6438-
xmlNodePtr result = xmlAddPrevSibling(cur_node, nodeblock);
6439-
notInserted = false;
6440-
}*/
64416477
}
64426478

6443-
if (notInserted) {
6444-
blocks->append(block);
6445-
return gFalse;
6446-
} else {
6447-
blocks->insert(insertIndex, block); // beware, the order can be the opposite if next block in next column..
6448-
return gTrue;
6449-
}
6479+
blocks->insert(insertIndex, block);
6480+
return insertIndex;
64506481
}
64516482

64526483
void TextPage::addImageInlineNode(xmlNodePtr nodeline,

0 commit comments

Comments
 (0)