Skip to content

Commit c9e85ab

Browse files
committed
Fix wrong font attributes in hOCR output
Instrumented code throws this runtime error during OCR: ../../src/api/baseapi.cpp:1616:5: runtime error: load of value 128, which is not a valid value for type 'bool' ../../src/api/baseapi.cpp:1627:5: runtime error: load of value 128, which is not a valid value for type 'bool' If there is no font information (typical for Tesseract with a LSTM model), the font attributes got random values resulting in wrong hOCR output. Signed-off-by: Stefan Weil <sw@weilnetz.de>
1 parent 0bdae8f commit c9e85ab

File tree

1 file changed

+35
-19
lines changed

1 file changed

+35
-19
lines changed

src/ccmain/ltrresultiterator.cpp

+35-19
Original file line numberDiff line numberDiff line change
@@ -171,27 +171,43 @@ const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
171171
bool* is_smallcaps,
172172
int* pointsize,
173173
int* font_id) const {
174-
if (it_->word() == nullptr) return nullptr; // Already at the end!
175-
float row_height = it_->row()->row->x_height() +
176-
it_->row()->row->ascenders() - it_->row()->row->descenders();
177-
// Convert from pixels to printers points.
178-
*pointsize = scaled_yres_ > 0
179-
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
180-
: 0;
181-
if (it_->word()->fontinfo == nullptr) {
174+
const char* result = nullptr;
175+
176+
if (it_->word()) {
177+
// Already at the end!
178+
*pointsize = 0;
179+
} else {
180+
float row_height = it_->row()->row->x_height() +
181+
it_->row()->row->ascenders() - it_->row()->row->descenders();
182+
// Convert from pixels to printers points.
183+
*pointsize = scaled_yres_ > 0
184+
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
185+
: 0;
186+
const FontInfo* font_info = it_->word()->fontinfo;
187+
if (font_info) {
188+
// Font information available.
189+
*font_id = font_info->universal_id;
190+
*is_bold = font_info->is_bold();
191+
*is_italic = font_info->is_italic();
192+
*is_underlined = false; // TODO(rays) fix this!
193+
*is_monospace = font_info->is_fixed_pitch();
194+
*is_serif = font_info->is_serif();
195+
*is_smallcaps = it_->word()->small_caps;
196+
result = font_info->name;
197+
}
198+
}
199+
200+
if (!result) {
201+
*is_bold = false;
202+
*is_italic = false;
203+
*is_underlined = false;
204+
*is_monospace = false;
205+
*is_serif = false;
206+
*is_smallcaps = false;
182207
*font_id = -1;
183-
return nullptr; // No font information.
184208
}
185-
const FontInfo& font_info = *it_->word()->fontinfo;
186-
*font_id = font_info.universal_id;
187-
*is_bold = font_info.is_bold();
188-
*is_italic = font_info.is_italic();
189-
*is_underlined = false; // TODO(rays) fix this!
190-
*is_monospace = font_info.is_fixed_pitch();
191-
*is_serif = font_info.is_serif();
192-
*is_smallcaps = it_->word()->small_caps;
193-
194-
return font_info.name;
209+
210+
return result;
195211
}
196212

197213
// Returns the name of the language used to recognize this word.

0 commit comments

Comments
 (0)