Skip to content

Commit 832c6ed

Browse files
committed
Allow saving of box/tiff pairs during base tesseract training
1 parent 17c8ac2 commit 832c6ed

File tree

1 file changed

+24
-12
lines changed

1 file changed

+24
-12
lines changed

src/training/tesstrain_utils.sh

+24-12
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ generate_font_image() {
259259
common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}"
260260
common_args+=" --outputbase=${outbase} --max_pages=${MAX_PAGES}"
261261
if $DISTORT_IMAGE; then
262-
common_args+=" --distort_image "
262+
common_args+=" --distort_image --invert=false"
263263
fi
264264
265265
# add --writing_mode=vertical-upright to common_args if the font is
@@ -326,6 +326,17 @@ phase_I_generate_image() {
326326
check_file_readable ${outbase}.box ${outbase}.tif
327327
done
328328
done
329+
if $SAVE_BOX_TIFF && ( ! $LINEDATA ) ; then
330+
tlog "\n=== Saving box/tiff pairs for training data ==="
331+
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
332+
tlog "Moving ${f} to ${OUTPUT_DIR}"
333+
cp "${f}" "${OUTPUT_DIR}"
334+
done
335+
for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
336+
tlog "Moving ${f} to ${OUTPUT_DIR}"
337+
cp "${f}" "${OUTPUT_DIR}"
338+
done
339+
fi
329340
}
330341
331342
# Phase UP : Generate (U)nicharset and (P)roperties file.
@@ -386,7 +397,7 @@ phase_D_generate_dawg() {
386397
387398
# Punctuation DAWG
388399
# -r arguments to wordlist2dawg denote RTL reverse policy
389-
# (see Trie::RTLReversePolicy enum in tesseract/src/dict/trie.h).
400+
# (see Trie::RTLReversePolicy enum in third_party/tesseract/dict/trie.h).
390401
# We specify 0/RRP_DO_NO_REVERSE when generating number DAWG,
391402
# 1/RRP_REVERSE_IF_HAS_RTL for freq and word DAWGS,
392403
# 2/RRP_FORCE_REVERSE for the punctuation DAWG.
@@ -562,17 +573,18 @@ make__lstmdata() {
562573
--output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \
563574
"${pass_through}" "${lang_is_rtl}"
564575
565-
if $SAVE_BOX_TIFF; then
576+
if $SAVE_BOX_TIFF ; then
566577
tlog "\n=== Saving box/tiff pairs for training data ==="
567-
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
568-
tlog "Moving ${f} to ${OUTPUT_DIR}"
569-
mv "${f}" "${OUTPUT_DIR}"
570-
done
571-
for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
572-
tlog "Moving ${f} to ${OUTPUT_DIR}"
573-
mv "${f}" "${OUTPUT_DIR}"
574-
done
575-
fi
578+
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
579+
tlog "Moving ${f} to ${OUTPUT_DIR}"
580+
mv "${f}" "${OUTPUT_DIR}"
581+
done
582+
for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
583+
tlog "Moving ${f} to ${OUTPUT_DIR}"
584+
mv "${f}" "${OUTPUT_DIR}"
585+
done
586+
fi
587+
576588
tlog "\n=== Moving lstmf files for training data ==="
577589
for f in "${TRAINING_DIR}/${LANG_CODE}".*.lstmf; do
578590
tlog "Moving ${f} to ${OUTPUT_DIR}"

0 commit comments

Comments
 (0)