Skip to content

Commit 4c7c960

Browse files
committed
fix issue 1417
1 parent 09b0c91 commit 4c7c960

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed

api/renderer.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
114114
AppendString(utf8);
115115
delete[] utf8;
116116

117+
bool pageBreak = false;
118+
api->GetBoolVariable("include_page_breaks", &pageBreak);
119+
const char* pageSeparator = api->GetStringVariable("page_separator");
120+
if(pageBreak) {
121+
AppendString(pageSeparator);
122+
}
123+
117124
return true;
118125
}
119126

ccmain/tesseractclass.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,12 @@ Tesseract::Tesseract()
442442
this->params()),
443443
BOOL_MEMBER(preserve_interword_spaces, false,
444444
"Preserve multiple interword spaces", this->params()),
445+
BOOL_MEMBER(include_page_breaks, FALSE,
446+
"Include page separator string in output text after each "
447+
"image/page.", this->params()),
448+
STRING_MEMBER(page_separator, "\f",
449+
"Page separator (default is form feed control character)",
450+
this->params()),
445451

446452
// The following parameters were deprecated and removed from their original
447453
// locations. The parameters are temporarily kept here to give Tesseract

ccmain/tesseractclass.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,13 @@ class Tesseract : public Wordrec {
10091009
double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75,
10101010
"Fraction of height used as a minimum gap for aligned blobs.");
10111011
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
1012-
BOOL_VAR_H(preserve_interword_spaces, false, "Preserve multiple interword spaces");
1012+
BOOL_VAR_H(preserve_interword_spaces, false,
1013+
"Preserve multiple interword spaces");
1014+
BOOL_VAR_H(include_page_breaks, false,
1015+
"Include page separator string in output text after each "
1016+
"image/page.");
1017+
STRING_VAR_H(page_separator, "\f",
1018+
"Page separator (default is form feed control character)");
10131019

10141020
// The following parameters were deprecated and removed from their original
10151021
// locations. The parameters are temporarily kept here to give Tesseract

0 commit comments

Comments
 (0)