@@ -1885,62 +1885,54 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
1885
1885
if (word->chopped_word == NULL ) return ;
1886
1886
ASSERT_HOST (word->best_choice != NULL );
1887
1887
1888
- inT32 index ; // char id index
1889
- // character iterator
1890
- BLOB_CHOICE_IT choice_it; // choice iterator
1891
1888
int fontinfo_size = get_fontinfo_table ().size ();
1892
- int fontset_size = get_fontset_table (). size () ;
1893
- if (fontinfo_size == 0 || fontset_size == 0 ) return ;
1894
- STATS fonts ( 0 , fontinfo_size); // font counters
1889
+ if (fontinfo_size == 0 ) return ;
1890
+ GenericVector< int > font_total_score ;
1891
+ font_total_score. init_to_size (fontinfo_size, 0 );
1895
1892
1896
1893
word->italic = 0 ;
1897
1894
word->bold = 0 ;
1898
- if (!word->best_choice_fontinfo_ids .empty ()) {
1899
- word->best_choice_fontinfo_ids .clear ();
1895
+ // Compute the font scores for the word
1896
+ if (tessedit_debug_fonts) {
1897
+ tprintf (" Examining fonts in %s\n " ,
1898
+ word->best_choice ->debug_string ().string ());
1899
+ }
1900
+ for (int b = 0 ; b < word->best_choice ->length (); ++b) {
1901
+ BLOB_CHOICE* choice = word->GetBlobChoice (b);
1902
+ if (choice == NULL ) continue ;
1903
+ const GenericVector<ScoredFont>& fonts = choice->fonts ();
1904
+ for (int f = 0 ; f < fonts.size (); ++f) {
1905
+ int fontinfo_id = fonts[f].fontinfo_id ;
1906
+ if (0 <= fontinfo_id && fontinfo_id < fontinfo_size) {
1907
+ font_total_score[fontinfo_id] += fonts[f].score ;
1908
+ }
1909
+ }
1900
1910
}
1901
- // Compute the modal font for the word
1902
- for ( index = 0 ; index < word-> best_choice -> length (); ++ index ) {
1903
- UNICHAR_ID word_ch_id = word-> best_choice -> unichar_id ( index ) ;
1904
- choice_it. set_to_list (word-> GetBlobChoices ( index ));
1905
- if (tessedit_debug_fonts) {
1906
- tprintf (" Examining fonts in %s \n " ,
1907
- word-> best_choice -> debug_string (). string () );
1911
+ // Find the top and 2nd choice for the word.
1912
+ int score1 = 0 , score2 = 0 ;
1913
+ inT16 font_id1 = - 1 , font_id2 = - 1 ;
1914
+ for ( int f = 0 ; f < fontinfo_size; ++f) {
1915
+ if (tessedit_debug_fonts && font_total_score[f] > 0 ) {
1916
+ tprintf (" Font %s, total score = %d \n " ,
1917
+ fontinfo_table_. get (f). name , font_total_score[f] );
1908
1918
}
1909
- for (choice_it.mark_cycle_pt (); !choice_it.cycled_list ();
1910
- choice_it.forward ()) {
1911
- UNICHAR_ID blob_ch_id = choice_it.data ()->unichar_id ();
1912
- if (blob_ch_id == word_ch_id) {
1913
- if (tessedit_debug_fonts) {
1914
- tprintf (" %s font %s (%d) font2 %s (%d)\n " ,
1915
- word->uch_set ->id_to_unichar (blob_ch_id),
1916
- choice_it.data ()->fontinfo_id () < 0 ? " unknown" :
1917
- fontinfo_table_.get (choice_it.data ()->fontinfo_id ()).name ,
1918
- choice_it.data ()->fontinfo_id (),
1919
- choice_it.data ()->fontinfo_id2 () < 0 ? " unknown" :
1920
- fontinfo_table_.get (choice_it.data ()->fontinfo_id2 ()).name ,
1921
- choice_it.data ()->fontinfo_id2 ());
1922
- }
1923
- // 1st choice font gets 2 pts, 2nd choice 1 pt.
1924
- if (choice_it.data ()->fontinfo_id () >= 0 ) {
1925
- fonts.add (choice_it.data ()->fontinfo_id (), 2 );
1926
- }
1927
- if (choice_it.data ()->fontinfo_id2 () >= 0 ) {
1928
- fonts.add (choice_it.data ()->fontinfo_id2 (), 1 );
1929
- }
1930
- break ;
1931
- }
1919
+ if (font_total_score[f] > score1) {
1920
+ score2 = score1;
1921
+ font_id2 = font_id1;
1922
+ score1 = font_total_score[f];
1923
+ font_id1 = f;
1924
+ } else if (font_total_score[f] > score2) {
1925
+ score2 = font_total_score[f];
1926
+ font_id2 = f;
1932
1927
}
1933
1928
}
1934
- inT16 font_id1, font_id2;
1935
- find_modal_font (&fonts, &font_id1, &word->fontinfo_id_count );
1936
- find_modal_font (&fonts, &font_id2, &word->fontinfo_id2_count );
1937
1929
word->fontinfo = font_id1 >= 0 ? &fontinfo_table_.get (font_id1) : NULL ;
1938
1930
word->fontinfo2 = font_id2 >= 0 ? &fontinfo_table_.get (font_id2) : NULL ;
1939
- // All the blobs get the word's best choice font.
1940
- for ( int i = 0 ; i < word-> best_choice -> length (); ++i) {
1941
- word->best_choice_fontinfo_ids . push_back (font_id1 );
1942
- }
1943
- if (word-> fontinfo_id_count > 0 ) {
1931
+ // Each score has a limit of MAX_UINT16, so divide by that to get the number
1932
+ // of "votes" for that font, ie number of perfect scores.
1933
+ word->fontinfo_id_count = ClipToRange (score1 / MAX_UINT16, 1 , MAX_INT8 );
1934
+ word-> fontinfo_id2_count = ClipToRange (score2 / MAX_UINT16, 0 , MAX_INT8);
1935
+ if (score1 > 0 ) {
1944
1936
FontInfo fi = fontinfo_table_.get (font_id1);
1945
1937
if (tessedit_debug_fonts) {
1946
1938
if (word->fontinfo_id2_count > 0 ) {
@@ -1953,9 +1945,8 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
1953
1945
fi.name , word->fontinfo_id_count );
1954
1946
}
1955
1947
}
1956
- // 1st choices got 2 pts, so we need to halve the score for the mode.
1957
- word->italic = (fi.is_italic () ? 1 : -1 ) * (word->fontinfo_id_count + 1 ) / 2 ;
1958
- word->bold = (fi.is_bold () ? 1 : -1 ) * (word->fontinfo_id_count + 1 ) / 2 ;
1948
+ word->italic = (fi.is_italic () ? 1 : -1 ) * word->fontinfo_id_count ;
1949
+ word->bold = (fi.is_bold () ? 1 : -1 ) * word->fontinfo_id_count ;
1959
1950
}
1960
1951
}
1961
1952
@@ -2009,8 +2000,7 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) {
2009
2000
word = page_res_it.word ();
2010
2001
int length = word->best_choice ->length ();
2011
2002
2012
- // 1st choices got 2 pts, so we need to halve the score for the mode.
2013
- int count = (word->fontinfo_id_count + 1 ) / 2 ;
2003
+ int count = word->fontinfo_id_count ;
2014
2004
if (!(count == length || (length > 3 && count >= length * 3 / 4 ))) {
2015
2005
word->fontinfo = modal_font;
2016
2006
// Counts only get 1 as it came from the doc.
0 commit comments