20
20
#include " config_auto.h"
21
21
#endif
22
22
23
+ #include < locale> // for std::locale::classic
23
24
#include < memory> // std::unique_ptr
25
+ #include < sstream> // for std::stringstream
24
26
#include " allheaders.h"
25
27
#include " baseapi.h"
26
28
#include < cmath>
@@ -324,7 +326,6 @@ static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
324
326
325
327
char * TessPDFRenderer::GetPDFTextObjects (TessBaseAPI* api,
326
328
double width, double height) {
327
- STRING pdf_str (" " );
328
329
double ppi = api->GetSourceYResolution ();
329
330
330
331
// These initial conditions are all arbitrary and will be overwritten
@@ -339,18 +340,20 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
339
340
double c = 0 ;
340
341
double d = 1 ;
341
342
343
+ std::stringstream pdf_str;
344
+ // Use "C" locale (needed for double values prec()).
345
+ pdf_str.imbue (std::locale::classic ());
346
+ // Use 8 digits for double values.
347
+ pdf_str.precision (8 );
348
+
342
349
// TODO(jbreiden) This marries the text and image together.
343
350
// Slightly cleaner from an abstraction standpoint if this were to
344
351
// live inside a separate text object.
345
- pdf_str += " q " ;
346
- pdf_str.add_str_double (" " , prec (width));
347
- pdf_str += " 0 0 " ;
348
- pdf_str.add_str_double (" " , prec (height));
349
- pdf_str += " 0 0 cm" ;
352
+ pdf_str << " q " << prec (width) << " 0 0 " << prec (height) << " 0 0 cm" ;
350
353
if (!textonly_) {
351
- pdf_str += " /Im1 Do" ;
354
+ pdf_str << " /Im1 Do" ;
352
355
}
353
- pdf_str += " Q\n " ;
356
+ pdf_str << " Q\n " ;
354
357
355
358
int line_x1 = 0 ;
356
359
int line_y1 = 0 ;
@@ -360,7 +363,7 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
360
363
ResultIterator *res_it = api->GetIterator ();
361
364
while (!res_it->Empty (RIL_BLOCK)) {
362
365
if (res_it->IsAtBeginningOf (RIL_BLOCK)) {
363
- pdf_str += " BT\n 3 Tr" ; // Begin text object, use invisible ink
366
+ pdf_str << " BT\n 3 Tr" ; // Begin text object, use invisible ink
364
367
old_fontsize = 0 ; // Every block will declare its fontsize
365
368
new_block = true ; // Every block will declare its affine matrix
366
369
}
@@ -412,20 +415,20 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
412
415
if (writing_direction != old_writing_direction || new_block) {
413
416
AffineMatrix (writing_direction,
414
417
line_x1, line_y1, line_x2, line_y2, &a, &b, &c, &d);
415
- pdf_str. add_str_double ( " " , prec (a)); // . This affine matrix
416
- pdf_str. add_str_double ( " " , prec (b)); // . sets the coordinate
417
- pdf_str. add_str_double ( " " , prec (c)); // . system for all
418
- pdf_str. add_str_double ( " " , prec (d)); // . text that follows.
419
- pdf_str. add_str_double ( " " , prec (x)); // .
420
- pdf_str. add_str_double ( " " , prec (y)); // .
421
- pdf_str += ( " Tm " ); // Place cursor absolutely
418
+ pdf_str << " " << prec (a) // . This affine matrix
419
+ << " " << prec (b) // . sets the coordinate
420
+ << " " << prec (c) // . system for all
421
+ << " " << prec (d) // . text that follows.
422
+ << " " << prec (x) // .
423
+ << " " << prec (y) // .
424
+ << ( " Tm " ); // Place cursor absolutely
422
425
new_block = false ;
423
426
} else {
424
427
double dx = x - old_x;
425
428
double dy = y - old_y;
426
- pdf_str. add_str_double ( " " , prec (dx * a + dy * b));
427
- pdf_str. add_str_double ( " " , prec (dx * c + dy * d));
428
- pdf_str += ( " Td " ); // Relative moveto
429
+ pdf_str << " " << prec (dx * a + dy * b)
430
+ << " " << prec (dx * c + dy * d)
431
+ << ( " Td " ); // Relative moveto
429
432
}
430
433
old_x = x;
431
434
old_y = y;
@@ -444,16 +447,14 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
444
447
if (fontsize <= 0 )
445
448
fontsize = kDefaultFontsize ;
446
449
if (fontsize != old_fontsize) {
447
- char textfont[20 ];
448
- snprintf (textfont, sizeof (textfont), " /f-0-0 %d Tf " , fontsize);
449
- pdf_str += textfont;
450
+ pdf_str << " /f-0-0 " << fontsize << " Tf " ;
450
451
old_fontsize = fontsize;
451
452
}
452
453
}
453
454
454
455
bool last_word_in_line = res_it->IsAtFinalElement (RIL_TEXTLINE, RIL_WORD);
455
456
bool last_word_in_block = res_it->IsAtFinalElement (RIL_BLOCK, RIL_WORD);
456
- STRING pdf_word ( " " ) ;
457
+ std::string pdf_word;
457
458
int pdf_word_len = 0 ;
458
459
do {
459
460
const std::unique_ptr<const char []> grapheme (
@@ -473,23 +474,22 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
473
474
if (word_length > 0 && pdf_word_len > 0 ) {
474
475
double h_stretch =
475
476
kCharWidth * prec (100.0 * word_length / (fontsize * pdf_word_len));
476
- pdf_str.add_str_double (" " , h_stretch);
477
- pdf_str += " Tz" ; // horizontal stretch
478
- pdf_str += " [ <" ;
479
- pdf_str += pdf_word; // UTF-16BE representation
480
- pdf_str += " > ] TJ" ; // show the text
477
+ pdf_str << h_stretch << " Tz" // horizontal stretch
478
+ << " [ <" << pdf_word // UTF-16BE representation
479
+ << " > ] TJ" ; // show the text
481
480
}
482
481
if (last_word_in_line) {
483
- pdf_str += " \n " ;
482
+ pdf_str << " \n " ;
484
483
}
485
484
if (last_word_in_block) {
486
- pdf_str += " ET\n " ; // end the text object
485
+ pdf_str << " ET\n " ; // end the text object
487
486
}
488
487
}
489
- char *ret = new char [pdf_str.length () + 1 ];
490
- strcpy (ret, pdf_str.string ());
488
+ const std::string& text = pdf_str.str ();
489
+ char * result = new char [text.length () + 1 ];
490
+ strcpy (result, text.c_str ());
491
491
delete res_it;
492
- return ret ;
492
+ return result ;
493
493
}
494
494
495
495
bool TessPDFRenderer::BeginDocumentHandler () {
0 commit comments