@@ -51,6 +51,12 @@ STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp",
51
51
BOOL_PARAM_FLAG (fontconfig_refresh_cache, false ,
52
52
" Does a one-time deletion of cache files from the "
53
53
" fontconfig_tmpdir before initializing fontconfig." );
54
+ BOOL_PARAM_FLAG (fontconfig_refresh_config_file, true ,
55
+ " Does a one-time reset of the fontconfig config file to point"
56
+ " to fonts_dir before initializing fontconfig. Set to true"
57
+ " if fontconfig_refresh_cache is true. Set it to false to use"
58
+ " multiple instances in separate processes without having to"
59
+ " rescan the fonts_dir, using a previously setup font cache" );
54
60
55
61
#ifndef USE_STD_NAMESPACE
56
62
#include " ocr/trainingdata/typesetting/legacy_fonts.h"
@@ -67,6 +73,8 @@ namespace tesseract {
67
73
// in pixels.
68
74
const int kDefaultResolution = 300 ;
69
75
76
+ bool PangoFontInfo::fontconfig_initialized_ = false ;
77
+
70
78
PangoFontInfo::PangoFontInfo () : desc_(NULL ), resolution_(kDefaultResolution ) {
71
79
Clear ();
72
80
}
@@ -103,34 +111,35 @@ string PangoFontInfo::DescriptionName() const {
103
111
104
112
// Initializes Fontconfig for use by writing a fake fonts.conf file into the
105
113
// FLAGS_fontconfigs_tmpdir directory, that points to the supplied
106
- // FLAGS_fonts_dir , and then overrides the FONTCONFIG_PATH environment variable
107
- // to point to this fonts.conf file.
108
- static void InitFontconfig () {
109
- static bool init_fontconfig = false ;
110
- if (init_fontconfig || FLAGS_fonts_dir .empty ()) {
111
- init_fontconfig = true ;
114
+ // fonts_dir , and then overrides the FONTCONFIG_PATH environment variable
115
+ // to point to this fonts.conf file. If force_clear, the cache is refreshed
116
+ // even if it has already been initialized.
117
+ void PangoFontInfo::InitFontConfig ( bool force_clear, const string& fonts_dir) {
118
+ if ((fontconfig_initialized_ && !force_clear) || fonts_dir .empty ()) {
119
+ fontconfig_initialized_ = true ;
112
120
return ;
113
121
}
114
- if (FLAGS_fontconfig_refresh_cache) {
115
- tprintf (" Deleting cache files from %s\n " , FLAGS_fontconfig_tmpdir.c_str ());
122
+ if (FLAGS_fontconfig_refresh_cache || force_clear) {
116
123
File::DeleteMatchingFiles (File::JoinPath (
117
- FLAGS_fontconfig_tmpdir.c_str (), " *cache-2" ).c_str ());
118
- }
119
- tprintf (" Initializing fontconfig\n " );
120
- const int MAX_FONTCONF_FILESIZE = 1024 ;
121
- char fonts_conf_template[MAX_FONTCONF_FILESIZE];
122
- snprintf (fonts_conf_template, MAX_FONTCONF_FILESIZE,
123
- " <?xml version=\" 1.0\" ?>\n "
124
- " <!DOCTYPE fontconfig SYSTEM \" fonts.dtd\" >\n "
125
- " <fontconfig>\n "
126
- " <dir>%s</dir>\n "
127
- " <cachedir>%s</cachedir>\n "
128
- " <config></config>\n "
129
- " </fontconfig>" , FLAGS_fonts_dir.c_str (),
130
- FLAGS_fontconfig_tmpdir.c_str ());
131
- string fonts_conf_file = File::JoinPath (FLAGS_fontconfig_tmpdir.c_str (),
132
- " fonts.conf" );
133
- File::WriteStringToFileOrDie (fonts_conf_template, fonts_conf_file);
124
+ FLAGS_fontconfig_tmpdir.c_str (), " *cache-?" ).c_str ());
125
+ }
126
+ if (FLAGS_fontconfig_refresh_config_file || FLAGS_fontconfig_refresh_cache ||
127
+ force_clear) {
128
+ const int MAX_FONTCONF_FILESIZE = 1024 ;
129
+ char fonts_conf_template[MAX_FONTCONF_FILESIZE];
130
+ snprintf (fonts_conf_template, MAX_FONTCONF_FILESIZE,
131
+ " <?xml version=\" 1.0\" ?>\n "
132
+ " <!DOCTYPE fontconfig SYSTEM \" fonts.dtd\" >\n "
133
+ " <fontconfig>\n "
134
+ " <dir>%s</dir>\n "
135
+ " <cachedir>%s</cachedir>\n "
136
+ " <config></config>\n "
137
+ " </fontconfig>" , fonts_dir.c_str (),
138
+ FLAGS_fontconfig_tmpdir.c_str ());
139
+ string fonts_conf_file = File::JoinPath (FLAGS_fontconfig_tmpdir.c_str (),
140
+ " fonts.conf" );
141
+ File::WriteStringToFileOrDie (fonts_conf_template, fonts_conf_file);
142
+ }
134
143
#ifdef _WIN32
135
144
std::string env (" FONTCONFIG_PATH=" );
136
145
env.append (FLAGS_fontconfig_tmpdir.c_str ());
@@ -141,12 +150,18 @@ static void InitFontconfig() {
141
150
// Fix the locale so that the reported font names are consistent.
142
151
setenv (" LANG" , " en_US.utf8" , true );
143
152
#endif // _WIN32
144
- init_fontconfig = true ;
153
+ if (!fontconfig_initialized_ || force_clear) {
154
+ if (FcInitReinitialize () != FcTrue) {
155
+ tprintf (" FcInitiReinitialize failed!!\n " );
156
+ }
157
+ }
158
+ fontconfig_initialized_ = true ;
159
+ FontUtils::ReInit ();
145
160
}
146
161
147
162
static void ListFontFamilies (PangoFontFamily*** families,
148
163
int * n_families) {
149
- InitFontconfig ( );
164
+ PangoFontInfo::InitFontConfig ( false , FLAGS_fonts_dir );
150
165
PangoFontMap* font_map = pango_cairo_font_map_get_default ();
151
166
DISABLE_HEAP_LEAK_CHECK;
152
167
pango_font_map_list_families (font_map, families, n_families);
@@ -220,7 +235,7 @@ bool PangoFontInfo::ParseFontDescriptionName(const string& name) {
220
235
// in the font map. Note that if the font is wholly missing, this could
221
236
// correspond to a completely different font family and face.
222
237
PangoFont* PangoFontInfo::ToPangoFont () const {
223
- InitFontconfig ( );
238
+ InitFontConfig ( false , FLAGS_fonts_dir );
224
239
PangoFontMap* font_map = pango_cairo_font_map_get_default ();
225
240
PangoContext* context = pango_context_new ();
226
241
pango_cairo_context_set_resolution (context, resolution_);
@@ -253,6 +268,28 @@ bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, int byte_length) const
253
268
return true ;
254
269
}
255
270
271
+ // This variant of strncpy permits src and dest to overlap. It will copy the
272
+ // first byte first.
273
+ static char * my_strnmove (char * dest, const char * src, size_t n) {
274
+ char * ret = dest;
275
+
276
+ // Copy characters until n reaches zero or the src byte is a nul.
277
+ do {
278
+ *dest = *src;
279
+ --n;
280
+ ++dest;
281
+ ++src;
282
+ } while (n && src[0 ]);
283
+
284
+ // If we reached a nul byte and there are more 'n' left, zero them out.
285
+ while (n) {
286
+ *dest = ' \0 ' ;
287
+ --n;
288
+ ++dest;
289
+ }
290
+ return ret;
291
+ }
292
+
256
293
int PangoFontInfo::DropUncoveredChars (string* utf8_text) const {
257
294
PangoFont* font = ToPangoFont ();
258
295
PangoCoverage* coverage = pango_font_get_coverage (font, NULL );
@@ -265,23 +302,30 @@ int PangoFontInfo::DropUncoveredChars(string* utf8_text) const {
265
302
UNICHAR::begin (utf8_text->c_str (), utf8_text->length ());
266
303
const UNICHAR::const_iterator it_end =
267
304
UNICHAR::end (utf8_text->c_str (), utf8_text->length ());
268
- for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it ) {
305
+ for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
269
306
// Skip bad utf-8.
270
- if (!it.is_legal ())
271
- continue ; // One suitable error message will still be issued.
272
- if (!IsWhitespace (*it) && !pango_is_zero_width (*it) &&
273
- pango_coverage_get (coverage, *it) != PANGO_COVERAGE_EXACT) {
307
+ if (!it.is_legal ()) {
308
+ ++it; // One suitable error message will still be issued.
309
+ continue ;
310
+ }
311
+ int unicode = *it;
312
+ int utf8_len = it.utf8_len ();
313
+ const char * utf8_char = it.utf8_data ();
314
+ // Move it forward before the data gets modified.
315
+ ++it;
316
+ if (!IsWhitespace (unicode) && !pango_is_zero_width (unicode) &&
317
+ pango_coverage_get (coverage, unicode) != PANGO_COVERAGE_EXACT) {
274
318
if (TLOG_IS_ON (2 )) {
275
- char tmp[ 5 ] ;
276
- int len = it. get_utf8 (tmp );
277
- tmp[len] = ' \0 ' ;
278
- tlog ( 2 , " '%s' (U+%x) not covered by font \n " , tmp, *it) ;
319
+ UNICHAR unichar (unicode) ;
320
+ char * str = unichar. utf8_str ( );
321
+ tlog ( 2 , " '%s' (U+%x) not covered by font \n " , str, unicode) ;
322
+ delete[] str ;
279
323
}
280
324
++num_dropped_chars;
281
325
continue ;
282
326
}
283
- strncpy (out, it. utf8_data (), it. utf8_len () );
284
- out += it. utf8_len () ;
327
+ my_strnmove (out, utf8_char, utf8_len);
328
+ out += utf8_len;
285
329
}
286
330
utf8_text->resize (out - utf8_text->c_str ());
287
331
return num_dropped_chars;
@@ -438,6 +482,7 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
438
482
439
483
440
484
// ------------------------ FontUtils ------------------------------------
485
+ vector<string> FontUtils::available_fonts_; // cache list
441
486
442
487
// Returns whether the specified font description is available in the fonts
443
488
// directory.
@@ -449,7 +494,8 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
449
494
// from the font_map, and then check what we loaded to see if it has the
450
495
// description we expected. If it is not, then the font is deemed unavailable.
451
496
/* static */
452
- bool FontUtils::IsAvailableFont (const char * input_query_desc) {
497
+ bool FontUtils::IsAvailableFont (const char * input_query_desc,
498
+ string* best_match) {
453
499
string query_desc (input_query_desc);
454
500
if (PANGO_VERSION <= 12005 ) {
455
501
// Strip commas and any ' Medium' substring in the name.
@@ -466,7 +512,7 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc) {
466
512
query_desc.c_str ());
467
513
PangoFont* selected_font = NULL ;
468
514
{
469
- InitFontconfig ( );
515
+ PangoFontInfo::InitFontConfig ( false , FLAGS_fonts_dir );
470
516
PangoFontMap* font_map = pango_cairo_font_map_get_default ();
471
517
PangoContext* context = pango_context_new ();
472
518
pango_context_set_font_map (context, font_map);
@@ -490,7 +536,16 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc) {
490
536
char * selected_desc_str = pango_font_description_to_string (selected_desc);
491
537
tlog (2 , " query_desc: '%s' Selected: 's'\n " , query_desc.c_str (),
492
538
selected_desc_str);
493
-
539
+ if (!equal && best_match != NULL ) {
540
+ *best_match = selected_desc_str;
541
+ // Clip the ending ' 0' if there is one. It seems that, if there is no
542
+ // point size on the end of the fontname, then Pango always appends ' 0'.
543
+ int len = best_match->size ();
544
+ if (len > 2 && best_match->at (len - 1 ) == ' 0' &&
545
+ best_match->at (len - 2 ) == ' ' ) {
546
+ *best_match = best_match->substr (0 , len - 2 );
547
+ }
548
+ }
494
549
g_free (selected_desc_str);
495
550
pango_font_description_free (selected_desc);
496
551
g_object_unref (selected_font);
@@ -512,7 +567,6 @@ static bool ShouldIgnoreFontFamilyName(const char* query) {
512
567
// Outputs description names of available fonts.
513
568
/* static */
514
569
const vector<string>& FontUtils::ListAvailableFonts () {
515
- static vector<string> available_fonts_; // cache list
516
570
if (available_fonts_.size ()) {
517
571
return available_fonts_;
518
572
}
@@ -536,8 +590,9 @@ const vector<string>& FontUtils::ListAvailableFonts() {
536
590
for (int i = 0 ; i < n_families; ++i) {
537
591
const char * family_name = pango_font_family_get_name (families[i]);
538
592
tlog (2 , " Listing family %s\n " , family_name);
539
- if (ShouldIgnoreFontFamilyName (family_name))
593
+ if (ShouldIgnoreFontFamilyName (family_name)) {
540
594
continue ;
595
+ }
541
596
542
597
int n_faces;
543
598
PangoFontFace** faces = NULL ;
@@ -733,4 +788,8 @@ bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len,
733
788
return false ;
734
789
}
735
790
791
+ // PangoFontInfo is reinitialized, so clear the static list of fonts.
792
+ /* static */
793
+ void FontUtils::ReInit () { available_fonts_.clear (); }
794
+
736
795
} // namespace tesseract
0 commit comments