From b9b2aea4865587cd6bcb2d77cec7385b68a441fd Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 11 Jun 2024 18:05:22 +0200 Subject: [PATCH 1/3] Fix more code style issues (reported by Codacy) Signed-off-by: Stefan Weil --- INSTALL.GIT.md | 2 -- unittest/README.md | 1 - 2 files changed, 3 deletions(-) diff --git a/INSTALL.GIT.md b/INSTALL.GIT.md index 25f3873666..45acd98445 100644 --- a/INSTALL.GIT.md +++ b/INSTALL.GIT.md @@ -50,7 +50,6 @@ Just run: and follow the instruction on [Viewer Debugging](https://tesseract-ocr.github.io/tessdoc/ViewerDebugging.html). - ## cmake There is alternative build system based on multiplatform [cmake](https://cmake.org/) @@ -61,7 +60,6 @@ There is alternative build system based on multiplatform [cmake](https://cmake.o cd build && cmake .. && make sudo make install - ### WINDOWS See the [documentation](https://tesseract-ocr.github.io/tessdoc/) for more information on this. diff --git a/unittest/README.md b/unittest/README.md index 64a409689f..2d7742993a 100644 --- a/unittest/README.md +++ b/unittest/README.md @@ -75,7 +75,6 @@ * [Lohit-Hindi.ttf](https://raw.githubusercontent.com/pratul/packageofpractices/master/assets/fonts/Lohit-Hindi.ttf) * [UnBatang.ttf](https://raw.githubusercontent.com/byrongibson/fonts/master/backup/truetype.original/unfonts-core/UnBatang.ttf) - ## Run tests To run the tests, do the following in tesseract folder From 305ef95cf09979e4dc69fb6886e64183c3e4bcee Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 11 Jun 2024 18:41:58 +0200 Subject: [PATCH 2/3] Add double quotes to prevent globbing and word splitting This fixes some issues which were reported by Codacy. Signed-off-by: Stefan Weil --- doc/generate_manpages.sh | 6 +++--- unittest/fuzzers/oss-fuzz-build.sh | 24 ++++++++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/generate_manpages.sh b/doc/generate_manpages.sh index e6d121d731..9f808d172c 100755 --- a/doc/generate_manpages.sh +++ b/doc/generate_manpages.sh @@ -25,9 +25,9 @@ if [[ -z "${asciidoc}" ]] || [[ -z "${xsltproc}" ]]; then else for src in *.asc; do pagename=${src/.asc/} - (${asciidoc} -d manpage ${src} && - ${asciidoc} -d manpage -b docbook ${src} && - ${xsltproc} --nonet ${man_xslt} ${pagename}.xml) || + (${asciidoc} -d manpage "${src}" && + ${asciidoc} -d manpage -b docbook "${src}" && + ${xsltproc} --nonet ${man_xslt} "${pagename}".xml) || echo "Error generating ${pagename}" done fi diff --git a/unittest/fuzzers/oss-fuzz-build.sh b/unittest/fuzzers/oss-fuzz-build.sh index 2ed20304da..5c7a37bb49 100755 --- a/unittest/fuzzers/oss-fuzz-build.sh +++ b/unittest/fuzzers/oss-fuzz-build.sh @@ -15,22 +15,22 @@ # ################################################################################ -cd $SRC/leptonica +cd "$SRC"/leptonica ./autogen.sh ./configure --disable-shared -make SUBDIRS=src install -j$(nproc) +make SUBDIRS=src install -j"$(nproc)" ldconfig -cd $SRC/tesseract +cd "$SRC"/tesseract ./autogen.sh CXXFLAGS="$CXXFLAGS -D_GLIBCXX_DEBUG" ./configure --disable-graphics --disable-shared -make -j$(nproc) +make -j"$(nproc)" # Get the models which are needed for the fuzzers. -mkdir -p $OUT/tessdata +mkdir -p "$OUT"/tessdata ( -cd $OUT/tessdata +cd "$OUT"/tessdata test -f eng.traineddata || \ curl -L -O https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata ) @@ -44,9 +44,9 @@ LEPTONICA_LIBS=$(pkg-config --static --libs lept) LIBTIFF_LIBS=$(pkg-config --static --libs libtiff-4 | sed 's/ -lm//') $CXX $CXXFLAGS \ - -I $SRC/tesseract/include \ - $SRC/tesseract/unittest/fuzzers/fuzzer-api.cpp -o $OUT/fuzzer-api \ - $SRC/tesseract/.libs/libtesseract.a \ + -I "$SRC"/tesseract/include \ + "$SRC"/tesseract/unittest/fuzzers/fuzzer-api.cpp -o "$OUT"/fuzzer-api \ + "$SRC"/tesseract/.libs/libtesseract.a \ $LEPTONICA_CFLAGS \ -Wl,-Bstatic $LEPTONICA_LIBS $LIBTIFF_LIBS -Wl,-Bdynamic \ $LIB_FUZZING_ENGINE @@ -54,9 +54,9 @@ $CXX $CXXFLAGS \ $CXX $CXXFLAGS \ -DTESSERACT_FUZZER_WIDTH=512 \ -DTESSERACT_FUZZER_HEIGHT=256 \ - -I $SRC/tesseract/include \ - $SRC/tesseract/unittest/fuzzers/fuzzer-api.cpp -o $OUT/fuzzer-api-512x256 \ - $SRC/tesseract/.libs/libtesseract.a \ + -I "$SRC"/tesseract/include \ + "$SRC"/tesseract/unittest/fuzzers/fuzzer-api.cpp -o "$OUT"/fuzzer-api-512x256 \ + "$SRC"/tesseract/.libs/libtesseract.a \ $LEPTONICA_CFLAGS \ -Wl,-Bstatic $LEPTONICA_LIBS $LIBTIFF_LIBS -Wl,-Bdynamic \ $LIB_FUZZING_ENGINE From efd320ba48a9012fc8b443f2ca11231ee31cfdcb Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 11 Jun 2024 19:00:17 +0200 Subject: [PATCH 3/3] Remove unused variable assignments This fixes some issues which were reported by Codacy. Signed-off-by: Stefan Weil --- src/arch/intsimdmatrixavx2.cpp | 1 - src/ccmain/fixspace.cpp | 3 +-- src/classify/intmatcher.cpp | 12 ++++-------- src/dict/dict.cpp | 2 +- src/textord/tablefind.cpp | 6 ++---- 5 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/arch/intsimdmatrixavx2.cpp b/src/arch/intsimdmatrixavx2.cpp index 5e5a3e04ce..081490851c 100644 --- a/src/arch/intsimdmatrixavx2.cpp +++ b/src/arch/intsimdmatrixavx2.cpp @@ -568,7 +568,6 @@ static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double * output += group_size; } group_size /= 2; - w_step /= 2; if (output + group_size <= rounded_num_out) { PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v); diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp index 7f1b166dfc..7f03ff03ea 100644 --- a/src/ccmain/fixspace.cpp +++ b/src/ccmain/fixspace.cpp @@ -267,8 +267,6 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { bool prev_char_1 = false; // prev ch a "1/I/l"? bool prev_char_digit = false; // prev ch 2..9 or 0 const char *punct_chars = "!\"`',.:;"; - bool prev_char_punct = false; - do { // current word WERD_RES *word = word_res_it.data(); @@ -325,6 +323,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { /* Add 1 to total score for every joined punctuation regardless of context and rejtn */ if (tessedit_prefer_joined_punct) { + bool prev_char_punct; for (i = 0, offset = 0, prev_char_punct = false; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) { bool current_char_punct = diff --git a/src/classify/intmatcher.cpp b/src/classify/intmatcher.cpp index 98162cdd2e..d32aee57de 100644 --- a/src/classify/intmatcher.cpp +++ b/src/classify/intmatcher.cpp @@ -892,7 +892,6 @@ void IntegerMatcher::DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT uint16_t ProtoNum; uint8_t ProtoWordNum; PROTO_SET_STRUCT *ProtoSet; - uint16_t ActualProtoNum; if (PrintMatchSummaryOn(Debug)) { tprintf("Configuration Mask:\n"); @@ -912,9 +911,8 @@ void IntegerMatcher::DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT if (PrintMatchSummaryOn(Debug)) { tprintf("Proto Mask:\n"); for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoWordNum = 0; ProtoWordNum < 2; ProtoWordNum++, ProtoMask++) { - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) && (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { @@ -934,7 +932,7 @@ void IntegerMatcher::DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT tprintf("Proto Evidence:\n"); for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { @@ -991,7 +989,6 @@ void IntegerMatcher::DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_ const ScratchEvidence &tables, bool SeparateDebugWindows) { uint16_t ProtoNum; - uint16_t ActualProtoNum; PROTO_SET_STRUCT *ProtoSet; int ProtoSetIndex; @@ -1003,7 +1000,7 @@ void IntegerMatcher::DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_ for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; + uint16_t ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { @@ -1076,13 +1073,12 @@ void ScratchEvidence::UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, uint16_t ProtoNum; PROTO_SET_STRUCT *ProtoSet; int NumProtos; - uint16_t ActualProtoNum; NumProtos = ClassTemplate->NumProtos; for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); ProtoNum++, ActualProtoNum++) { int temp = 0; diff --git a/src/dict/dict.cpp b/src/dict/dict.cpp index 8874a55ea6..be3cc1372f 100644 --- a/src/dict/dict.cpp +++ b/src/dict/dict.cpp @@ -886,7 +886,7 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) { } WERD_CHOICE new_word(word.unicharset()); auto last_index = word.length() - 1; - int new_len = 0; + int new_len; for (unsigned i = 0; i <= last_index; ++i) { UNICHAR_ID unichar_id = (word.unichar_id(i)); if (getUnicharset().get_ispunctuation(unichar_id)) { diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp index 2ed4e495e7..537bec9cb0 100644 --- a/src/textord/tablefind.cpp +++ b/src/textord/tablefind.cpp @@ -884,8 +884,6 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const { } // Variables used to compute inter-blob spacing. - int current_x0 = -1; - int current_x1 = -1; int previous_x1 = -1; // Stores the maximum gap detected. int largest_partition_gap_found = -1; @@ -897,8 +895,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX *blob = it.data(); - current_x0 = blob->bounding_box().left(); - current_x1 = blob->bounding_box().right(); + int current_x0 = blob->bounding_box().left(); + int current_x1 = blob->bounding_box().right(); if (previous_x1 != -1) { int gap = current_x0 - previous_x1;