Skip to content

Commit 84920b9

Browse files
committed
Font and classifier output structure cleanup.
Font recognition was poor, due to forcing a 1st and 2nd choice at a character level, when the total score for the correct font is often correct at the word level, so allowed the propagation of a full set of fonts and scores to the word recognizer, which can now decide word level fonts using the scores instead of simple votes. Change precipitated a cleanup of output data structures for classifier results, eliminating ScoredClass and INT_RESULT_STRUCT, with a few extra elements going in UnicharRating, and using that wherever possible. That added the extra complexity of 1-rating due to a flip between 0 is good and 0 is bad for the internal classifier scores before they are converted to rating and certainty.
1 parent 0e868ef commit 84920b9

19 files changed

+432
-431
lines changed

ccmain/applybox.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
272272
// limited by the ability of the chopper to find suitable chop points,
273273
// and not by the value of the certainties.
274274
BLOB_CHOICE* choice =
275-
new BLOB_CHOICE(0, rating, -rating, -1, -1, 0, 0, 0, 0, BCC_FAKE);
275+
new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE);
276276
blob_choices.push_back(choice);
277277
rating -= 0.125f;
278278
}
@@ -291,8 +291,8 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
291291
left_choice->set_certainty(-rating);
292292
// combine confidence w/ serial #
293293
BLOB_CHOICE* right_choice = new BLOB_CHOICE(++right_chop_index,
294-
rating - 0.125f, -rating,
295-
-1, -1, 0, 0, 0, 0, BCC_FAKE);
294+
rating - 0.125f, -rating, -1,
295+
0.0f, 0.0f, 0.0f, BCC_FAKE);
296296
blob_choices.insert(right_choice, blob_number + 1);
297297
}
298298
}

ccmain/control.cpp

+41-51
Original file line numberDiff line numberDiff line change
@@ -1885,62 +1885,54 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
18851885
if (word->chopped_word == NULL) return;
18861886
ASSERT_HOST(word->best_choice != NULL);
18871887

1888-
inT32 index; // char id index
1889-
// character iterator
1890-
BLOB_CHOICE_IT choice_it; // choice iterator
18911888
int fontinfo_size = get_fontinfo_table().size();
1892-
int fontset_size = get_fontset_table().size();
1893-
if (fontinfo_size == 0 || fontset_size == 0) return;
1894-
STATS fonts(0, fontinfo_size); // font counters
1889+
if (fontinfo_size == 0) return;
1890+
GenericVector<int> font_total_score;
1891+
font_total_score.init_to_size(fontinfo_size, 0);
18951892

18961893
word->italic = 0;
18971894
word->bold = 0;
1898-
if (!word->best_choice_fontinfo_ids.empty()) {
1899-
word->best_choice_fontinfo_ids.clear();
1895+
// Compute the font scores for the word
1896+
if (tessedit_debug_fonts) {
1897+
tprintf("Examining fonts in %s\n",
1898+
word->best_choice->debug_string().string());
1899+
}
1900+
for (int b = 0; b < word->best_choice->length(); ++b) {
1901+
BLOB_CHOICE* choice = word->GetBlobChoice(b);
1902+
if (choice == NULL) continue;
1903+
const GenericVector<ScoredFont>& fonts = choice->fonts();
1904+
for (int f = 0; f < fonts.size(); ++f) {
1905+
int fontinfo_id = fonts[f].fontinfo_id;
1906+
if (0 <= fontinfo_id && fontinfo_id < fontinfo_size) {
1907+
font_total_score[fontinfo_id] += fonts[f].score;
1908+
}
1909+
}
19001910
}
1901-
// Compute the modal font for the word
1902-
for (index = 0; index < word->best_choice->length(); ++index) {
1903-
UNICHAR_ID word_ch_id = word->best_choice->unichar_id(index);
1904-
choice_it.set_to_list(word->GetBlobChoices(index));
1905-
if (tessedit_debug_fonts) {
1906-
tprintf("Examining fonts in %s\n",
1907-
word->best_choice->debug_string().string());
1911+
// Find the top and 2nd choice for the word.
1912+
int score1 = 0, score2 = 0;
1913+
inT16 font_id1 = -1, font_id2 = -1;
1914+
for (int f = 0; f < fontinfo_size; ++f) {
1915+
if (tessedit_debug_fonts && font_total_score[f] > 0) {
1916+
tprintf("Font %s, total score = %d\n",
1917+
fontinfo_table_.get(f).name, font_total_score[f]);
19081918
}
1909-
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
1910-
choice_it.forward()) {
1911-
UNICHAR_ID blob_ch_id = choice_it.data()->unichar_id();
1912-
if (blob_ch_id == word_ch_id) {
1913-
if (tessedit_debug_fonts) {
1914-
tprintf("%s font %s (%d) font2 %s (%d)\n",
1915-
word->uch_set->id_to_unichar(blob_ch_id),
1916-
choice_it.data()->fontinfo_id() < 0 ? "unknown" :
1917-
fontinfo_table_.get(choice_it.data()->fontinfo_id()).name,
1918-
choice_it.data()->fontinfo_id(),
1919-
choice_it.data()->fontinfo_id2() < 0 ? "unknown" :
1920-
fontinfo_table_.get(choice_it.data()->fontinfo_id2()).name,
1921-
choice_it.data()->fontinfo_id2());
1922-
}
1923-
// 1st choice font gets 2 pts, 2nd choice 1 pt.
1924-
if (choice_it.data()->fontinfo_id() >= 0) {
1925-
fonts.add(choice_it.data()->fontinfo_id(), 2);
1926-
}
1927-
if (choice_it.data()->fontinfo_id2() >= 0) {
1928-
fonts.add(choice_it.data()->fontinfo_id2(), 1);
1929-
}
1930-
break;
1931-
}
1919+
if (font_total_score[f] > score1) {
1920+
score2 = score1;
1921+
font_id2 = font_id1;
1922+
score1 = font_total_score[f];
1923+
font_id1 = f;
1924+
} else if (font_total_score[f] > score2) {
1925+
score2 = font_total_score[f];
1926+
font_id2 = f;
19321927
}
19331928
}
1934-
inT16 font_id1, font_id2;
1935-
find_modal_font(&fonts, &font_id1, &word->fontinfo_id_count);
1936-
find_modal_font(&fonts, &font_id2, &word->fontinfo_id2_count);
19371929
word->fontinfo = font_id1 >= 0 ? &fontinfo_table_.get(font_id1) : NULL;
19381930
word->fontinfo2 = font_id2 >= 0 ? &fontinfo_table_.get(font_id2) : NULL;
1939-
// All the blobs get the word's best choice font.
1940-
for (int i = 0; i < word->best_choice->length(); ++i) {
1941-
word->best_choice_fontinfo_ids.push_back(font_id1);
1942-
}
1943-
if (word->fontinfo_id_count > 0) {
1931+
// Each score has a limit of MAX_UINT16, so divide by that to get the number
1932+
// of "votes" for that font, ie number of perfect scores.
1933+
word->fontinfo_id_count = ClipToRange(score1 / MAX_UINT16, 1, MAX_INT8);
1934+
word->fontinfo_id2_count = ClipToRange(score2 / MAX_UINT16, 0, MAX_INT8);
1935+
if (score1 > 0) {
19441936
FontInfo fi = fontinfo_table_.get(font_id1);
19451937
if (tessedit_debug_fonts) {
19461938
if (word->fontinfo_id2_count > 0) {
@@ -1953,9 +1945,8 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
19531945
fi.name, word->fontinfo_id_count);
19541946
}
19551947
}
1956-
// 1st choices got 2 pts, so we need to halve the score for the mode.
1957-
word->italic = (fi.is_italic() ? 1 : -1) * (word->fontinfo_id_count + 1) / 2;
1958-
word->bold = (fi.is_bold() ? 1 : -1) * (word->fontinfo_id_count + 1) / 2;
1948+
word->italic = (fi.is_italic() ? 1 : -1) * word->fontinfo_id_count;
1949+
word->bold = (fi.is_bold() ? 1 : -1) * word->fontinfo_id_count;
19591950
}
19601951
}
19611952

@@ -2009,8 +2000,7 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) {
20092000
word = page_res_it.word();
20102001
int length = word->best_choice->length();
20112002

2012-
// 1st choices got 2 pts, so we need to halve the score for the mode.
2013-
int count = (word->fontinfo_id_count + 1) / 2;
2003+
int count = word->fontinfo_id_count;
20142004
if (!(count == length || (length > 3 && count >= length * 3 / 4))) {
20152005
word->fontinfo = modal_font;
20162006
// Counts only get 1 as it came from the doc.

ccmain/cube_control.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
384384
UNICHAR_ID uch_id =
385385
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
386386
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
387-
-1, -1, 0, 0, 0, 0, BCC_STATIC_CLASSIFIER);
387+
-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
388388
}
389389
word->FakeClassifyWord(num_chars, choices);
390390
// within a word, cube recognizes the word in reading order.

ccstruct/fontinfo.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -59,23 +59,23 @@ bool FontInfoTable::DeSerialize(bool swap, FILE* fp) {
5959
// Returns true if the given set of fonts includes one with the same
6060
// properties as font_id.
6161
bool FontInfoTable::SetContainsFontProperties(
62-
int font_id, const GenericVector<int>& font_set) const {
62+
int font_id, const GenericVector<ScoredFont>& font_set) const {
6363
uinT32 properties = get(font_id).properties;
6464
for (int f = 0; f < font_set.size(); ++f) {
65-
if (get(font_set[f]).properties == properties)
65+
if (get(font_set[f].fontinfo_id).properties == properties)
6666
return true;
6767
}
6868
return false;
6969
}
7070

7171
// Returns true if the given set of fonts includes multiple properties.
7272
bool FontInfoTable::SetContainsMultipleFontProperties(
73-
const GenericVector<int>& font_set) const {
73+
const GenericVector<ScoredFont>& font_set) const {
7474
if (font_set.empty()) return false;
75-
int first_font = font_set[0];
75+
int first_font = font_set[0].fontinfo_id;
7676
uinT32 properties = get(first_font).properties;
7777
for (int f = 1; f < font_set.size(); ++f) {
78-
if (get(font_set[f]).properties != properties)
78+
if (get(font_set[f].fontinfo_id).properties != properties)
7979
return true;
8080
}
8181
return false;

ccstruct/fontinfo.h

+19-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,22 @@ namespace tesseract {
3131

3232
class BitVector;
3333

34+
// Simple struct to hold a font and a score. The scores come from the low-level
35+
// integer matcher, so they are in the uinT16 range. Fonts are an index to
36+
// fontinfo_table.
37+
// These get copied around a lot, so best to keep them small.
38+
struct ScoredFont {
39+
ScoredFont() : fontinfo_id(-1), score(0) {}
40+
ScoredFont(int font_id, uinT16 classifier_score)
41+
: fontinfo_id(font_id), score(classifier_score) {}
42+
43+
// Index into fontinfo table, but inside the classifier, may be a shapetable
44+
// index.
45+
inT32 fontinfo_id;
46+
// Raw score from the low-level classifier.
47+
uinT16 score;
48+
};
49+
3450
// Struct for information about spacing between characters in a particular font.
3551
struct FontSpacingInfo {
3652
inT16 x_gap_before;
@@ -140,11 +156,11 @@ class FontInfoTable : public GenericVector<FontInfo> {
140156

141157
// Returns true if the given set of fonts includes one with the same
142158
// properties as font_id.
143-
bool SetContainsFontProperties(int font_id,
144-
const GenericVector<int>& font_set) const;
159+
bool SetContainsFontProperties(
160+
int font_id, const GenericVector<ScoredFont>& font_set) const;
145161
// Returns true if the given set of fonts includes multiple properties.
146162
bool SetContainsMultipleFontProperties(
147-
const GenericVector<int>& font_set) const;
163+
const GenericVector<ScoredFont>& font_set) const;
148164

149165
// Moves any non-empty FontSpacingInfo entries from other to this.
150166
void MoveSpacingInfoFrom(FontInfoTable* other);

ccstruct/pageres.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) {
148148
add_next_word = false;
149149
}
150150
}
151+
next_word->set_flag(W_FUZZY_NON, add_next_word);
151152
} else {
152153
add_next_word = next_word->flag(W_FUZZY_NON);
153154
}
@@ -206,12 +207,8 @@ WERD_RES& WERD_RES::operator=(const WERD_RES & source) {
206207
if (!wc_dest_it.empty()) {
207208
wc_dest_it.move_to_first();
208209
best_choice = wc_dest_it.data();
209-
best_choice_fontinfo_ids = source.best_choice_fontinfo_ids;
210210
} else {
211211
best_choice = NULL;
212-
if (!best_choice_fontinfo_ids.empty()) {
213-
best_choice_fontinfo_ids.clear();
214-
}
215212
}
216213

217214
if (source.raw_choice != NULL) {

ccstruct/pageres.h

-2
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,6 @@ class WERD_RES : public ELIST_LINK {
315315
BOOL8 combination; //of two fuzzy gap wds
316316
BOOL8 part_of_combo; //part of a combo
317317
BOOL8 reject_spaces; //Reject spacing?
318-
// FontInfo ids for each unichar in best_choice.
319-
GenericVector<inT8> best_choice_fontinfo_ids;
320318

321319
WERD_RES() {
322320
InitNonPointers();

ccstruct/ratngs.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ static const char * const kPermuterTypeNames[] = {
9090
BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
9191
float src_rating, // rating
9292
float src_cert, // certainty
93-
inT16 src_fontinfo_id, // font
94-
inT16 src_fontinfo_id2, // 2nd choice font
9593
int src_script_id, // script
9694
float min_xheight, // min xheight allowed
9795
float max_xheight, // max xheight by this char
@@ -100,8 +98,8 @@ BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
10098
unichar_id_ = src_unichar_id;
10199
rating_ = src_rating;
102100
certainty_ = src_cert;
103-
fontinfo_id_ = src_fontinfo_id;
104-
fontinfo_id2_ = src_fontinfo_id2;
101+
fontinfo_id_ = -1;
102+
fontinfo_id2_ = -1;
105103
script_id_ = src_script_id;
106104
min_xheight_ = min_xheight;
107105
max_xheight_ = max_xheight;
@@ -126,6 +124,7 @@ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
126124
max_xheight_ = other.max_xheight_;
127125
yshift_ = other.yshift();
128126
classifier_ = other.classifier_;
127+
fonts_ = other.fonts_;
129128
}
130129

131130
// Returns true if *this and other agree on the baseline and x-height

ccstruct/ratngs.h

+23-8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include "clst.h"
2626
#include "elst.h"
27+
#include "fontinfo.h"
2728
#include "genericvector.h"
2829
#include "matrix.h"
2930
#include "unichar.h"
@@ -64,8 +65,6 @@ class BLOB_CHOICE: public ELIST_LINK
6465
BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
6566
float src_rating, // rating
6667
float src_cert, // certainty
67-
inT16 src_fontinfo_id, // font
68-
inT16 src_fontinfo_id2, // 2nd choice font
6968
int script_id, // script
7069
float min_xheight, // min xheight in image pixel units
7170
float max_xheight, // max xheight allowed by this char
@@ -89,6 +88,26 @@ class BLOB_CHOICE: public ELIST_LINK
8988
inT16 fontinfo_id2() const {
9089
return fontinfo_id2_;
9190
}
91+
const GenericVector<tesseract::ScoredFont>& fonts() const {
92+
return fonts_;
93+
}
94+
void set_fonts(const GenericVector<tesseract::ScoredFont>& fonts) {
95+
fonts_ = fonts;
96+
int score1 = 0, score2 = 0;
97+
fontinfo_id_ = -1;
98+
fontinfo_id2_ = -1;
99+
for (int f = 0; f < fonts_.size(); ++f) {
100+
if (fonts_[f].score > score1) {
101+
score2 = score1;
102+
fontinfo_id2_ = fontinfo_id_;
103+
score1 = fonts_[f].score;
104+
fontinfo_id_ = fonts_[f].fontinfo_id;
105+
} else if (fonts_[f].score > score2) {
106+
score2 = fonts_[f].score;
107+
fontinfo_id2_ = fonts_[f].fontinfo_id;
108+
}
109+
}
110+
}
92111
int script_id() const {
93112
return script_id_;
94113
}
@@ -131,12 +150,6 @@ class BLOB_CHOICE: public ELIST_LINK
131150
void set_certainty(float newrat) {
132151
certainty_ = newrat;
133152
}
134-
void set_fontinfo_id(inT16 newfont) {
135-
fontinfo_id_ = newfont;
136-
}
137-
void set_fontinfo_id2(inT16 newfont) {
138-
fontinfo_id2_ = newfont;
139-
}
140153
void set_script(int newscript_id) {
141154
script_id_ = newscript_id;
142155
}
@@ -186,6 +199,8 @@ class BLOB_CHOICE: public ELIST_LINK
186199

187200
private:
188201
UNICHAR_ID unichar_id_; // unichar id
202+
// Fonts and scores. Allowed to be empty.
203+
GenericVector<tesseract::ScoredFont> fonts_;
189204
inT16 fontinfo_id_; // char font information
190205
inT16 fontinfo_id2_; // 2nd choice font information
191206
// Rating is the classifier distance weighted by the length of the outline

ccutil/genericvector.h

+8-4
Original file line numberDiff line numberDiff line change
@@ -445,8 +445,10 @@ class PointerVector : public GenericVector<T*> {
445445
}
446446

447447
PointerVector<T>& operator=(const PointerVector& other) {
448-
this->truncate(0);
449-
this->operator+=(other);
448+
if (&other != this) {
449+
this->truncate(0);
450+
this->operator+=(other);
451+
}
450452
return *this;
451453
}
452454

@@ -777,8 +779,10 @@ GenericVector<T> &GenericVector<T>::operator+=(const GenericVector& other) {
777779

778780
template <typename T>
779781
GenericVector<T> &GenericVector<T>::operator=(const GenericVector& other) {
780-
this->truncate(0);
781-
this->operator+=(other);
782+
if (&other != this) {
783+
this->truncate(0);
784+
this->operator+=(other);
785+
}
782786
return *this;
783787
}
784788

0 commit comments

Comments
 (0)