Skip to content

Commit ad51394

Browse files
authored
Revert "Changes to make generated xmls valid according to alto 3.1 schema."
1 parent 8970d47 commit ad51394

File tree

5 files changed

+21
-65
lines changed

5 files changed

+21
-65
lines changed

src/ConstantsXML.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ namespace ConstantsXML {
44
// All tags ALTO XML dialect
55
const char *TAG_ALTO = "alto";
66

7-
const char *ALTO_URI = "http://www.loc.gov/standards/alto/ns-v3#";
7+
const char *ALTO_URI = "http://www.loc.gov/standards/alto/v3/alto.xsd";
88

99
const char *TAG_DESCRIPTION = "Description";
1010
const char *TAG_MEASUREMENTUNIT = "MeasurementUnit";

src/Parameters.cc

-6
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,6 @@ void Parameters::setReadingOrder(GBool readingOrders) {
7171
unlockGlobalParams;
7272
}
7373

74-
void Parameters::setCharReadingOrderAttr(GBool charReadingOrderAttrs) {
75-
lockGlobalParams;
76-
charReadingOrderAttr = charReadingOrderAttrs;
77-
unlockGlobalParams;
78-
}
79-
8074
void Parameters::setOcr(GBool ocrA) {
8175
lockGlobalParams;
8276
ocr = ocrA;

src/Parameters.h

-14
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,6 @@ class Parameters {
7272
*/
7373
GBool getReadingOrder() {return readingOrder;}
7474

75-
/** PL: Return the boolean that controls whether to include TYPE attributes to String elements to
76-
* indicate right-to-left reading order (produces non-valid ALTO)
77-
* @return <code>true</code> if the charReadingOrderAttr option is selected, <code>false</code> otherwise
78-
*/
79-
GBool getCharReadingOrderAttr() {return charReadingOrderAttr;}
80-
8175
/** Return a boolean which inform if OCR should be applied to recognize non unicode glyphs
8276
* @return <code>true</code> if the ocr option is selected, <code>false</code> otherwise
8377
*/
@@ -128,12 +122,6 @@ class Parameters {
128122
*/
129123
void setReadingOrder(GBool readingOrders);
130124

131-
/** PL: Modifiy the boolean that controls whether to include TYPE attributes to String elements to indicate
132-
* right-to-left reading order (produces non-valid ALTO)
133-
* @param charReadingOrderAttr <code>true</code> if the charReadingOrderAttr option is selected, <code>false</code> otherwise
134-
*/
135-
void setCharReadingOrderAttr(GBool charReadingOrderAttrs);
136-
137125
/** Modifiy the boolean which inform ocr should be applied or not
138126
* @param readingOrder <code>true</code> if the readingOrder option is selected, <code>false</code> otherwise
139127
*/
@@ -161,8 +149,6 @@ class Parameters {
161149
GBool imageInline;
162150
/** PL: The value of the readingOrder option */
163151
GBool readingOrder;
164-
/** PL: The value of the charReadingOrderAttr option */
165-
GBool charReadingOrderAttr;
166152
/** The value of ocr option */
167153
GBool ocr;
168154
/** the count limit of files */

src/XmlAltoOutputDev.cc

+20-36
Original file line numberDiff line numberDiff line change
@@ -2842,10 +2842,6 @@ void TextPage::addWord(TextRawWord *word) {
28422842

28432843
void TextPage::addAttributTypeReadingOrder(xmlNodePtr node, char *tmp,
28442844
IWord *word) {
2845-
if (parameters->getCharReadingOrderAttr() == gFalse) {
2846-
return;
2847-
}
2848-
28492845
int nbLeft = 0;
28502846
int nbRight = 0;
28512847

@@ -4935,15 +4931,12 @@ void TextPage::dumpInReadingOrder(GBool useBlocks, GBool fullFontName) {
49354931
snprintf(tmp, sizeof(tmp), ATTR_NUMFORMAT, listeImages[i]->getHeightImage());
49364932
xmlNewProp(node, (const xmlChar *) ATTR_HEIGHT, (const xmlChar *) tmp);
49374933

4938-
std::string rotation = std::to_string(listeImages[i]->getRotation());
4939-
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)rotation.c_str());
4940-
//if (listeImages[i]->getRotation() > 0){
4941-
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
4942-
//}
4943-
//else{
4944-
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
4945-
//}
4946-
4934+
if (listeImages[i]->getRotation() > 0){
4935+
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
4936+
}
4937+
else{
4938+
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
4939+
}
49474940
// if (listeImages[i]->isImageInline()) {
49484941
// xmlNewProp(node, (const xmlChar *) ATTR_INLINE, (const xmlChar *) sTRUE);
49494942
// }
@@ -5792,15 +5785,12 @@ void TextPage::dump(GBool useBlocks, GBool fullFontName) {
57925785
snprintf(tmp, sizeof(tmp), ATTR_NUMFORMAT, listeImages[i]->getHeightImage());
57935786
xmlNewProp(node, (const xmlChar *) ATTR_HEIGHT, (const xmlChar *) tmp);
57945787

5795-
std::string rotation = std::to_string(listeImages[i]->getRotation());
5796-
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)rotation.c_str());
5797-
//if (listeImages[i]->getRotation() > 0){
5798-
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
5799-
//}
5800-
//else{
5801-
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
5802-
//}
5803-
5788+
if (listeImages[i]->getRotation() > 0){
5789+
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
5790+
}
5791+
else{
5792+
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
5793+
}
58045794
// if (listeImages[i]->isImageInline()) {
58055795
// xmlNewProp(node, (const xmlChar *) ATTR_INLINE, (const xmlChar *) sTRUE);
58065796
// }
@@ -5847,14 +5837,11 @@ void TextPage::dump(GBool useBlocks, GBool fullFontName) {
58475837
snprintf(tmp, sizeof(tmp), ATTR_NUMFORMAT, svg_ymax - svg_ymin);
58485838
xmlNewProp(node, (const xmlChar *) ATTR_HEIGHT, (const xmlChar *) tmp);
58495839

5850-
std::string rotation = std::to_string(r);
5851-
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)rotation.c_str());
5852-
//if (r > 0) {
5853-
// xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sTRUE);
5854-
//} else {
5855-
// xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sFALSE);
5856-
//}
5857-
5840+
if (r > 0) {
5841+
xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sTRUE);
5842+
} else {
5843+
xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sFALSE);
5844+
}
58585845
// if (listeImages[i]->isImageInline()) {
58595846
// xmlNewProp(node, (const xmlChar *) ATTR_INLINE, (const xmlChar *) sTRUE);
58605847
// }
@@ -7309,10 +7296,8 @@ XmlAltoOutputDev::XmlAltoOutputDev(GString *fileName, GString *fileNamePdf,
73097296
xmlAddChild(nodeOCRProcessingStep, nodeProcessingDate);
73107297
time_t t;
73117298
time(&t);
7312-
char tstamp[sizeof "YYYY-MM-DDTHH:MM:SSZ"];
7313-
strftime(tstamp, sizeof tstamp, "%FT%TZ", gmtime(&t));
73147299
xmlNodeSetContent(nodeProcessingDate, (const xmlChar *) xmlEncodeEntitiesReentrant(
7315-
nodeProcessingDate->doc, (const xmlChar *) tstamp));
7300+
nodeProcessingDate->doc, (const xmlChar *) ctime(&t)));
73167301

73177302
xmlNodePtr nodeProcessingSoftware = xmlNewNode(NULL, (const xmlChar *) TAG_PROCESSINGSOFTWARE);
73187303
nodeProcessingSoftware->type = XML_ELEMENT_NODE;
@@ -7527,7 +7512,7 @@ void XmlAltoOutputDev::addStyles() {
75277512
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTWIDTH, (const xmlChar *) tmp);
75287513

75297514
sprintf(tmp, "%s", fontStyleInfo->getFontColor()->getCString());
7530-
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTCOLOR, (const xmlChar *) (tmp+1));
7515+
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTCOLOR, (const xmlChar *) tmp);
75317516

75327517
delete fontStyleInfo->getFontColor();
75337518

@@ -7556,8 +7541,7 @@ void XmlAltoOutputDev::addStyles() {
75567541
}
75577542

75587543
sprintf(tmp, "%s", fontStyle->getCString());
7559-
if ( strcmp(tmp, "") )
7560-
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTSTYLE, (const xmlChar *) tmp);
7544+
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTSTYLE, (const xmlChar *) tmp);
75617545

75627546
delete fontStyle;
75637547

src/pdfalto.cc

-8
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ static GBool fullFontName = gFalse;
6565
static GBool noImageInline = gFalse;
6666
static GBool annots = gFalse;
6767
static GBool readingOrder = gFalse;
68-
static GBool charReadingOrderAttr = gFalse;
6968
static GBool ocr = gFalse;
7069

7170
static char ownerPassword[33] = "\001";
@@ -99,8 +98,6 @@ static ArgDesc argDesc[] = {
9998
"add blocks informations within the structure"},
10099
{"-readingOrder", argFlag, &readingOrder, 0,
101100
"blocks follow the reading order"},
102-
{"-charReadingOrderAttr", argFlag, &charReadingOrderAttr, 0,
103-
"include TYPE attribute to String elements to indicate right-to-left reading order (not valid ALTO)"},
104101
// {"-ocr", argFlag, &ocr, 0,
105102
// "recognises all characters that are missing from unicode."},
106103
{"-fullFontName", argFlag, &fullFontName, 0,
@@ -218,11 +215,6 @@ int main(int argc, char *argv[]) {
218215
cmd->append("-readingOrder ");
219216
}
220217

221-
if (charReadingOrderAttr) {
222-
parameters->setCharReadingOrderAttr(gTrue);
223-
cmd->append("-charReadingOrderAttr ");
224-
}
225-
226218
if (ocr) {
227219
parameters->setOcr(gTrue);
228220
cmd->append("-ocr ");

0 commit comments

Comments
 (0)