Skip to content

Commit f6fd9b3

Browse files
tfmorriszdenop
authored andcommitted
Handle null raw_choice - fixes #235, fixes #246
1 parent 90403ef commit f6fd9b3

File tree

2 files changed

+24
-9
lines changed

2 files changed

+24
-9
lines changed

src/ccmain/control.cpp

+19-8
Original file line numberDiff line numberDiff line change
@@ -1300,14 +1300,25 @@ float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT* pr_it,
13001300
SetupWordPassN(1, &wd);
13011301
classify_word_and_language(pass_n, &it, &wd);
13021302
if (debug_noise_removal) {
1303-
tprintf("word xheight=%g, row=%g, range=[%g,%g]\n", word_res->x_height,
1304-
wd.row->x_height(), wd.word->raw_choice->min_x_height(),
1305-
wd.word->raw_choice->max_x_height());
1306-
}
1307-
const float cert = wd.word->raw_choice->certainty();
1308-
const float rat = wd.word->raw_choice->rating();
1309-
*c2 = rat > 0.0f ? cert * cert / rat : 0.0f;
1310-
*best_str = wd.word->raw_choice->unichar_string();
1303+
if (wd.word->raw_choice != NULL) {
1304+
tprintf("word xheight=%g, row=%g, range=[%g,%g]\n", word_res->x_height,
1305+
wd.row->x_height(), wd.word->raw_choice->min_x_height(),
1306+
wd.word->raw_choice->max_x_height());
1307+
} else {
1308+
tprintf("Got word with null raw choice xheight=%g, row=%g\n", word_res->x_height,
1309+
wd.row->x_height());
1310+
}
1311+
}
1312+
float cert = 0.0f;
1313+
if (wd.word->raw_choice != NULL) { // This probably shouldn't happen, but...
1314+
cert = wd.word->raw_choice->certainty();
1315+
float rat = wd.word->raw_choice->rating();
1316+
*c2 = rat > 0.0f ? cert * cert / rat : 0.0f;
1317+
*best_str = wd.word->raw_choice->unichar_string();
1318+
} else {
1319+
*c2 = 0.0f;
1320+
*best_str = "";
1321+
}
13111322
it.DeleteCurrentWord();
13121323
pr_it->ResetWordIterator();
13131324
return cert;

src/training/stringrenderer.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,11 @@ void StringRenderer::ComputeClusterBoxes() {
543543
// pango.
544544
std::vector<std::string> cluster_text;
545545
if (GetClusterStrings(&cluster_text)) {
546-
ASSERT_HOST(cluster_text.size() == start_byte_to_box.size());
546+
tprintf("* %d, cluster_text.size(): %d\t", page_, cluster_text.size());
547+
tprintf("start_byte_to_box.size(): %d\n", start_byte_to_box.size());
548+
if (cluster_text.size() != start_byte_to_box.size())
549+
tprintf(">%s<\n", cluster_text[0].c_str());
550+
ASSERT_HOST(cluster_text.size() == start_byte_to_box.size());
547551
int ind = 0;
548552
for (std::map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
549553
it != start_byte_to_box.end(); ++it, ++ind) {

0 commit comments

Comments
 (0)