Allow saving of box/tiff pairs during base tesseract training

Shreeshrii · Shreeshrii · commit 832c6edb971a · 2019-06-14T09:25:54.000Z
diff --git a/src/training/tesstrain_utils.sh b/src/training/tesstrain_utils.sh
@@ -259,7 +259,7 @@ generate_font_image() {
     common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}"
     common_args+=" --outputbase=${outbase} --max_pages=${MAX_PAGES}"
     if $DISTORT_IMAGE; then
-        common_args+=" --distort_image "
+        common_args+=" --distort_image --invert=false"
     fi
 
     # add --writing_mode=vertical-upright to common_args if the font is
@@ -326,6 +326,17 @@ phase_I_generate_image() {
             check_file_readable ${outbase}.box ${outbase}.tif
         done
     done
+    if $SAVE_BOX_TIFF && ( ! $LINEDATA ) ; then
+    tlog "\n=== Saving box/tiff pairs for training data ==="
+        for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
+            tlog "Moving ${f} to ${OUTPUT_DIR}"
+            cp "${f}" "${OUTPUT_DIR}"
+        done
+        for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
+            tlog "Moving ${f} to ${OUTPUT_DIR}"
+            cp "${f}" "${OUTPUT_DIR}"
+        done
+    fi
 }
 
 # Phase UP : Generate (U)nicharset and (P)roperties file.
@@ -386,7 +397,7 @@ phase_D_generate_dawg() {
 
     # Punctuation DAWG
     # -r arguments to wordlist2dawg denote RTL reverse policy
-    # (see Trie::RTLReversePolicy enum in tesseract/src/dict/trie.h).
+    # (see Trie::RTLReversePolicy enum in third_party/tesseract/dict/trie.h).
     # We specify 0/RRP_DO_NO_REVERSE when generating number DAWG,
     # 1/RRP_REVERSE_IF_HAS_RTL for freq and word DAWGS,
     # 2/RRP_FORCE_REVERSE for the punctuation DAWG.
@@ -562,17 +573,18 @@ make__lstmdata() {
     --output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \
     "${pass_through}" "${lang_is_rtl}"
 
-  if $SAVE_BOX_TIFF; then
+    if $SAVE_BOX_TIFF ; then
     tlog "\n=== Saving box/tiff pairs for training data ==="
-  for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
-    tlog "Moving ${f} to ${OUTPUT_DIR}"
-    mv "${f}" "${OUTPUT_DIR}"
-  done
-  for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
-    tlog "Moving ${f} to ${OUTPUT_DIR}"
-    mv "${f}" "${OUTPUT_DIR}"
-  done
-  fi
+        for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
+            tlog "Moving ${f} to ${OUTPUT_DIR}"
+            mv "${f}" "${OUTPUT_DIR}"
+        done
+        for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
+            tlog "Moving ${f} to ${OUTPUT_DIR}"
+            mv "${f}" "${OUTPUT_DIR}"
+        done
+    fi
+
   tlog "\n=== Moving lstmf files for training data ==="
   for f in "${TRAINING_DIR}/${LANG_CODE}".*.lstmf; do
     tlog "Moving ${f} to ${OUTPUT_DIR}"