From b9b2aea4865587cd6bcb2d77cec7385b68a441fd Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Tue, 11 Jun 2024 18:05:22 +0200
Subject: [PATCH 1/3] Fix more code style issues (reported by Codacy)

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 INSTALL.GIT.md     | 2 --
 unittest/README.md | 1 -
 2 files changed, 3 deletions(-)

diff --git a/INSTALL.GIT.md b/INSTALL.GIT.md
index 25f3873666..45acd98445 100644
--- a/INSTALL.GIT.md
+++ b/INSTALL.GIT.md
@@ -50,7 +50,6 @@ Just run:
 
 and follow the instruction on [Viewer Debugging](https://tesseract-ocr.github.io/tessdoc/ViewerDebugging.html).
 
-
 ## cmake
 
 There is alternative build system based on multiplatform [cmake](https://cmake.org/)
@@ -61,7 +60,6 @@ There is alternative build system based on multiplatform [cmake](https://cmake.o
     cd build && cmake .. && make
     sudo make install
 
-
 ### WINDOWS
 
 See the [documentation](https://tesseract-ocr.github.io/tessdoc/) for more information on this.
diff --git a/unittest/README.md b/unittest/README.md
index 64a409689f..2d7742993a 100644
--- a/unittest/README.md
+++ b/unittest/README.md
@@ -75,7 +75,6 @@
 * [Lohit-Hindi.ttf](https://raw.githubusercontent.com/pratul/packageofpractices/master/assets/fonts/Lohit-Hindi.ttf)
 * [UnBatang.ttf](https://raw.githubusercontent.com/byrongibson/fonts/master/backup/truetype.original/unfonts-core/UnBatang.ttf)
 
-
 ## Run tests
 
 To run the tests, do the following in tesseract folder

From 305ef95cf09979e4dc69fb6886e64183c3e4bcee Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Tue, 11 Jun 2024 18:41:58 +0200
Subject: [PATCH 2/3] Add double quotes to prevent globbing and word splitting

This fixes some issues which were reported by Codacy.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 doc/generate_manpages.sh           |  6 +++---
 unittest/fuzzers/oss-fuzz-build.sh | 24 ++++++++++++------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/doc/generate_manpages.sh b/doc/generate_manpages.sh
index e6d121d731..9f808d172c 100755
--- a/doc/generate_manpages.sh
+++ b/doc/generate_manpages.sh
@@ -25,9 +25,9 @@ if [[ -z "${asciidoc}" ]] || [[ -z "${xsltproc}" ]]; then
 else
   for src in *.asc; do
     pagename=${src/.asc/}
-    (${asciidoc} -d manpage ${src} &&
-     ${asciidoc} -d manpage -b docbook ${src} &&
-       ${xsltproc} --nonet ${man_xslt} ${pagename}.xml) ||
+    (${asciidoc} -d manpage "${src}" &&
+     ${asciidoc} -d manpage -b docbook "${src}" &&
+       ${xsltproc} --nonet ${man_xslt} "${pagename}".xml) ||
        echo "Error generating ${pagename}"
   done
 fi
diff --git a/unittest/fuzzers/oss-fuzz-build.sh b/unittest/fuzzers/oss-fuzz-build.sh
index 2ed20304da..5c7a37bb49 100755
--- a/unittest/fuzzers/oss-fuzz-build.sh
+++ b/unittest/fuzzers/oss-fuzz-build.sh
@@ -15,22 +15,22 @@
 #
 ################################################################################
 
-cd $SRC/leptonica
+cd "$SRC"/leptonica
 ./autogen.sh
 ./configure --disable-shared
-make SUBDIRS=src install -j$(nproc)
+make SUBDIRS=src install -j"$(nproc)"
 ldconfig
 
-cd $SRC/tesseract
+cd "$SRC"/tesseract
 ./autogen.sh
 CXXFLAGS="$CXXFLAGS -D_GLIBCXX_DEBUG" ./configure --disable-graphics --disable-shared
-make -j$(nproc)
+make -j"$(nproc)"
 
 # Get the models which are needed for the fuzzers.
 
-mkdir -p $OUT/tessdata
+mkdir -p "$OUT"/tessdata
 (
-cd $OUT/tessdata
+cd "$OUT"/tessdata
 test -f eng.traineddata || \
   curl -L -O https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata
 )
@@ -44,9 +44,9 @@ LEPTONICA_LIBS=$(pkg-config --static --libs lept)
 LIBTIFF_LIBS=$(pkg-config --static --libs libtiff-4 | sed 's/ -lm//')
 
 $CXX $CXXFLAGS \
-    -I $SRC/tesseract/include \
-     $SRC/tesseract/unittest/fuzzers/fuzzer-api.cpp -o $OUT/fuzzer-api \
-     $SRC/tesseract/.libs/libtesseract.a \
+    -I "$SRC"/tesseract/include \
+     "$SRC"/tesseract/unittest/fuzzers/fuzzer-api.cpp -o "$OUT"/fuzzer-api \
+     "$SRC"/tesseract/.libs/libtesseract.a \
      $LEPTONICA_CFLAGS \
      -Wl,-Bstatic $LEPTONICA_LIBS $LIBTIFF_LIBS -Wl,-Bdynamic \
      $LIB_FUZZING_ENGINE
@@ -54,9 +54,9 @@ $CXX $CXXFLAGS \
 $CXX $CXXFLAGS \
     -DTESSERACT_FUZZER_WIDTH=512 \
     -DTESSERACT_FUZZER_HEIGHT=256 \
-    -I $SRC/tesseract/include \
-     $SRC/tesseract/unittest/fuzzers/fuzzer-api.cpp -o $OUT/fuzzer-api-512x256 \
-     $SRC/tesseract/.libs/libtesseract.a \
+    -I "$SRC"/tesseract/include \
+     "$SRC"/tesseract/unittest/fuzzers/fuzzer-api.cpp -o "$OUT"/fuzzer-api-512x256 \
+     "$SRC"/tesseract/.libs/libtesseract.a \
      $LEPTONICA_CFLAGS \
      -Wl,-Bstatic $LEPTONICA_LIBS $LIBTIFF_LIBS -Wl,-Bdynamic \
      $LIB_FUZZING_ENGINE

From efd320ba48a9012fc8b443f2ca11231ee31cfdcb Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Tue, 11 Jun 2024 19:00:17 +0200
Subject: [PATCH 3/3] Remove unused variable assignments

This fixes some issues which were reported by Codacy.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 src/arch/intsimdmatrixavx2.cpp |  1 -
 src/ccmain/fixspace.cpp        |  3 +--
 src/classify/intmatcher.cpp    | 12 ++++--------
 src/dict/dict.cpp              |  2 +-
 src/textord/tablefind.cpp      |  6 ++----
 5 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/arch/intsimdmatrixavx2.cpp b/src/arch/intsimdmatrixavx2.cpp
index 5e5a3e04ce..081490851c 100644
--- a/src/arch/intsimdmatrixavx2.cpp
+++ b/src/arch/intsimdmatrixavx2.cpp
@@ -568,7 +568,6 @@ static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *
     output += group_size;
   }
   group_size /= 2;
-  w_step /= 2;
 
   if (output + group_size <= rounded_num_out) {
     PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v);
diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp
index 7f1b166dfc..7f03ff03ea 100644
--- a/src/ccmain/fixspace.cpp
+++ b/src/ccmain/fixspace.cpp
@@ -267,8 +267,6 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
   bool prev_char_1 = false;     // prev ch a "1/I/l"?
   bool prev_char_digit = false; // prev ch 2..9 or 0
   const char *punct_chars = "!\"`',.:;";
-  bool prev_char_punct = false;
-
   do {
     // current word
     WERD_RES *word = word_res_it.data();
@@ -325,6 +323,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
       /* Add 1 to total score for every joined punctuation regardless of context
   and rejtn */
       if (tessedit_prefer_joined_punct) {
+        bool prev_char_punct;
         for (i = 0, offset = 0, prev_char_punct = false; i < word_len;
              offset += word->best_choice->unichar_lengths()[i++]) {
           bool current_char_punct =
diff --git a/src/classify/intmatcher.cpp b/src/classify/intmatcher.cpp
index 98162cdd2e..d32aee57de 100644
--- a/src/classify/intmatcher.cpp
+++ b/src/classify/intmatcher.cpp
@@ -892,7 +892,6 @@ void IntegerMatcher::DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT
   uint16_t ProtoNum;
   uint8_t ProtoWordNum;
   PROTO_SET_STRUCT *ProtoSet;
-  uint16_t ActualProtoNum;
 
   if (PrintMatchSummaryOn(Debug)) {
     tprintf("Configuration Mask:\n");
@@ -912,9 +911,8 @@ void IntegerMatcher::DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT
   if (PrintMatchSummaryOn(Debug)) {
     tprintf("Proto Mask:\n");
     for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
-      ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
       for (ProtoWordNum = 0; ProtoWordNum < 2; ProtoWordNum++, ProtoMask++) {
-        ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
+        uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
         for (ProtoNum = 0; ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) &&
                             (ActualProtoNum < ClassTemplate->NumProtos));
              ProtoNum++, ActualProtoNum++) {
@@ -934,7 +932,7 @@ void IntegerMatcher::DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT
     tprintf("Proto Evidence:\n");
     for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
       ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
-      ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
+      uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
       for (ProtoNum = 0;
            ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < ClassTemplate->NumProtos));
            ProtoNum++, ActualProtoNum++) {
@@ -991,7 +989,6 @@ void IntegerMatcher::DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_
                                            const ScratchEvidence &tables,
                                            bool SeparateDebugWindows) {
   uint16_t ProtoNum;
-  uint16_t ActualProtoNum;
   PROTO_SET_STRUCT *ProtoSet;
   int ProtoSetIndex;
 
@@ -1003,7 +1000,7 @@ void IntegerMatcher::DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_
 
   for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
-    ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
+    uint16_t ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
     for (ProtoNum = 0;
          ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < ClassTemplate->NumProtos));
          ProtoNum++, ActualProtoNum++) {
@@ -1076,13 +1073,12 @@ void ScratchEvidence::UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate,
   uint16_t ProtoNum;
   PROTO_SET_STRUCT *ProtoSet;
   int NumProtos;
-  uint16_t ActualProtoNum;
 
   NumProtos = ClassTemplate->NumProtos;
 
   for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
-    ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
+    uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
     for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
          ProtoNum++, ActualProtoNum++) {
       int temp = 0;
diff --git a/src/dict/dict.cpp b/src/dict/dict.cpp
index 8874a55ea6..be3cc1372f 100644
--- a/src/dict/dict.cpp
+++ b/src/dict/dict.cpp
@@ -886,7 +886,7 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) {
   }
   WERD_CHOICE new_word(word.unicharset());
   auto last_index = word.length() - 1;
-  int new_len = 0;
+  int new_len;
   for (unsigned i = 0; i <= last_index; ++i) {
     UNICHAR_ID unichar_id = (word.unichar_id(i));
     if (getUnicharset().get_ispunctuation(unichar_id)) {
diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp
index 2ed4e495e7..537bec9cb0 100644
--- a/src/textord/tablefind.cpp
+++ b/src/textord/tablefind.cpp
@@ -884,8 +884,6 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const {
   }
 
   // Variables used to compute inter-blob spacing.
-  int current_x0 = -1;
-  int current_x1 = -1;
   int previous_x1 = -1;
   // Stores the maximum gap detected.
   int largest_partition_gap_found = -1;
@@ -897,8 +895,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const {
 
   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
     BLOBNBOX *blob = it.data();
-    current_x0 = blob->bounding_box().left();
-    current_x1 = blob->bounding_box().right();
+    int current_x0 = blob->bounding_box().left();
+    int current_x1 = blob->bounding_box().right();
     if (previous_x1 != -1) {
       int gap = current_x0 - previous_x1;