File tree 3 files changed +20
-1
lines changed
3 files changed +20
-1
lines changed Original file line number Diff line number Diff line change @@ -114,6 +114,13 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
114
114
AppendString (utf8);
115
115
delete[] utf8;
116
116
117
+ bool pageBreak = false ;
118
+ api->GetBoolVariable (" include_page_breaks" , &pageBreak);
119
+ const char * pageSeparator = api->GetStringVariable (" page_separator" );
120
+ if (pageBreak) {
121
+ AppendString (pageSeparator);
122
+ }
123
+
117
124
return true ;
118
125
}
119
126
Original file line number Diff line number Diff line change @@ -442,6 +442,12 @@ Tesseract::Tesseract()
442
442
this->params()),
443
443
BOOL_MEMBER(preserve_interword_spaces, false,
444
444
"Preserve multiple interword spaces", this->params()),
445
+ BOOL_MEMBER(include_page_breaks, FALSE,
446
+ "Include page separator string in output text after each "
447
+ "image/page.", this->params()),
448
+ STRING_MEMBER(page_separator, "\f",
449
+ "Page separator (default is form feed control character)",
450
+ this->params()),
445
451
446
452
// The following parameters were deprecated and removed from their original
447
453
// locations. The parameters are temporarily kept here to give Tesseract
Original file line number Diff line number Diff line change @@ -1009,7 +1009,13 @@ class Tesseract : public Wordrec {
1009
1009
double_VAR_H (textord_tabfind_aligned_gap_fraction, 0.75 ,
1010
1010
" Fraction of height used as a minimum gap for aligned blobs." );
1011
1011
INT_VAR_H (tessedit_parallelize, 0 , " Run in parallel where possible" );
1012
- BOOL_VAR_H (preserve_interword_spaces, false , " Preserve multiple interword spaces" );
1012
+ BOOL_VAR_H (preserve_interword_spaces, false ,
1013
+ " Preserve multiple interword spaces" );
1014
+ BOOL_VAR_H (include_page_breaks, false ,
1015
+ " Include page separator string in output text after each "
1016
+ " image/page." );
1017
+ STRING_VAR_H (page_separator, " \f " ,
1018
+ " Page separator (default is form feed control character)" );
1013
1019
1014
1020
// The following parameters were deprecated and removed from their original
1015
1021
// locations. The parameters are temporarily kept here to give Tesseract
You can’t perform that action at this time.
0 commit comments