26
26
27
27
namespace tesseract {
28
28
29
- PageIterator::PageIterator (PAGE_RES* page_res, Tesseract* tesseract,
30
- int scale, int scaled_yres,
31
- int rect_left, int rect_top,
29
+ PageIterator::PageIterator (PAGE_RES* page_res, Tesseract* tesseract, int scale,
30
+ int scaled_yres, int rect_left, int rect_top,
32
31
int rect_width, int rect_height)
33
- : page_res_(page_res), tesseract_(tesseract),
34
- word_ (NULL ), word_length_(0 ), blob_index_(0 ), cblob_it_(NULL ),
35
- scale_(scale), scaled_yres_(scaled_yres),
36
- rect_left_(rect_left), rect_top_(rect_top),
37
- rect_width_(rect_width), rect_height_(rect_height) {
32
+ : page_res_(page_res),
33
+ tesseract_ (tesseract),
34
+ word_(NULL ),
35
+ word_length_(0 ),
36
+ blob_index_(0 ),
37
+ cblob_it_(NULL ),
38
+ include_upper_dots_(false ),
39
+ include_lower_dots_(false ),
40
+ scale_(scale),
41
+ scaled_yres_(scaled_yres),
42
+ rect_left_(rect_left),
43
+ rect_top_(rect_top),
44
+ rect_width_(rect_width),
45
+ rect_height_(rect_height) {
38
46
it_ = new PAGE_RES_IT (page_res);
39
47
PageIterator::Begin ();
40
48
}
@@ -50,19 +58,29 @@ PageIterator::~PageIterator() {
50
58
* objects at a higher level.
51
59
*/
52
60
PageIterator::PageIterator (const PageIterator& src)
53
- : page_res_(src.page_res_), tesseract_(src.tesseract_),
54
- word_ (NULL ), word_length_(src.word_length_),
55
- blob_index_(src.blob_index_), cblob_it_(NULL ),
56
- scale_(src.scale_), scaled_yres_(src.scaled_yres_),
57
- rect_left_(src.rect_left_), rect_top_(src.rect_top_),
58
- rect_width_(src.rect_width_), rect_height_(src.rect_height_) {
61
+ : page_res_(src.page_res_),
62
+ tesseract_ (src.tesseract_),
63
+ word_(NULL ),
64
+ word_length_(src.word_length_),
65
+ blob_index_(src.blob_index_),
66
+ cblob_it_(NULL ),
67
+ include_upper_dots_(src.include_upper_dots_),
68
+ include_lower_dots_(src.include_lower_dots_),
69
+ scale_(src.scale_),
70
+ scaled_yres_(src.scaled_yres_),
71
+ rect_left_(src.rect_left_),
72
+ rect_top_(src.rect_top_),
73
+ rect_width_(src.rect_width_),
74
+ rect_height_(src.rect_height_) {
59
75
it_ = new PAGE_RES_IT (*src.it_ );
60
76
BeginWord (src.blob_index_ );
61
77
}
62
78
63
79
const PageIterator& PageIterator::operator =(const PageIterator& src) {
64
80
page_res_ = src.page_res_ ;
65
81
tesseract_ = src.tesseract_ ;
82
+ include_upper_dots_ = src.include_upper_dots_ ;
83
+ include_lower_dots_ = src.include_lower_dots_ ;
66
84
scale_ = src.scale_ ;
67
85
scaled_yres_ = src.scaled_yres_ ;
68
86
rect_left_ = src.rect_left_ ;
@@ -252,16 +270,19 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
252
270
PARA *para = NULL ;
253
271
switch (level) {
254
272
case RIL_BLOCK:
255
- box = it_->block ()->block ->bounding_box ();
273
+ box = it_->block ()->block ->restricted_bounding_box (include_upper_dots_,
274
+ include_lower_dots_);
256
275
break ;
257
276
case RIL_PARA:
258
277
para = it_->row ()->row ->para ();
259
278
// explicit fall-through.
260
279
case RIL_TEXTLINE:
261
- box = it_->row ()->row ->bounding_box ();
280
+ box = it_->row ()->row ->restricted_bounding_box (include_upper_dots_,
281
+ include_lower_dots_);
262
282
break ;
263
283
case RIL_WORD:
264
- box = it_->word ()->word ->bounding_box ();
284
+ box = it_->word ()->word ->restricted_bounding_box (include_upper_dots_,
285
+ include_lower_dots_);
265
286
break ;
266
287
case RIL_SYMBOL:
267
288
if (cblob_it_ == NULL )
@@ -387,39 +408,23 @@ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
387
408
int left, top, right, bottom;
388
409
if (!BoundingBoxInternal (level, &left, &top, &right, &bottom))
389
410
return NULL ;
390
- Pix* pix = NULL ;
391
- switch (level) {
392
- case RIL_BLOCK:
393
- case RIL_PARA:
394
- int bleft, btop, bright, bbottom;
395
- BoundingBoxInternal (RIL_BLOCK, &bleft, &btop, &bright, &bbottom);
396
- pix = it_->block ()->block ->render_mask ();
397
- // AND the mask and the image.
398
- pixRasterop (pix, 0 , 0 , pixGetWidth (pix), pixGetHeight (pix),
399
- PIX_SRC & PIX_DST, tesseract_->pix_binary (),
400
- bleft, btop);
401
- if (level == RIL_PARA) {
402
- // RIL_PARA needs further attention:
403
- // clip the paragraph from the block mask.
404
- Box* box = boxCreate (left - bleft, top - btop,
405
- right - left, bottom - top);
406
- Pix* pix2 = pixClipRectangle (pix, box, NULL );
407
- boxDestroy (&box);
408
- pixDestroy (&pix);
409
- pix = pix2;
410
- }
411
- break ;
412
- case RIL_TEXTLINE:
413
- case RIL_WORD:
414
- case RIL_SYMBOL:
415
- if (level == RIL_SYMBOL && cblob_it_ != NULL &&
416
- cblob_it_->data ()->area () != 0 )
417
- return cblob_it_->data ()->render ();
418
- // Just clip from the bounding box.
419
- Box* box = boxCreate (left, top, right - left, bottom - top);
420
- pix = pixClipRectangle (tesseract_->pix_binary (), box, NULL );
421
- boxDestroy (&box);
422
- break ;
411
+ if (level == RIL_SYMBOL && cblob_it_ != NULL &&
412
+ cblob_it_->data ()->area () != 0 )
413
+ return cblob_it_->data ()->render ();
414
+ Box* box = boxCreate (left, top, right - left, bottom - top);
415
+ Pix* pix = pixClipRectangle (tesseract_->pix_binary (), box, NULL );
416
+ boxDestroy (&box);
417
+ if (level == RIL_BLOCK || level == RIL_PARA) {
418
+ // Clip to the block polygon as well.
419
+ TBOX mask_box;
420
+ Pix* mask = it_->block ()->block ->render_mask (&mask_box);
421
+ int mask_x = left - mask_box.left ();
422
+ int mask_y = top - (tesseract_->ImageHeight () - mask_box.top ());
423
+ // AND the mask and pix, putting the result in pix.
424
+ pixRasterop (pix, MAX (0 , -mask_x), MAX (0 , -mask_y), pixGetWidth (pix),
425
+ pixGetHeight (pix), PIX_SRC & PIX_DST, mask, MAX (0 , mask_x),
426
+ MAX (0 , mask_y));
427
+ pixDestroy (&mask);
423
428
}
424
429
return pix;
425
430
}
@@ -452,17 +457,24 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
452
457
Box* box = boxCreate (*left, *top, right - *left, bottom - *top);
453
458
Pix* grey_pix = pixClipRectangle (original_img, box, NULL );
454
459
boxDestroy (&box);
455
- if (level == RIL_BLOCK) {
456
- Pix* mask = it_->block ()->block ->render_mask ();
457
- Pix* expanded_mask = pixCreate (right - *left, bottom - *top, 1 );
458
- pixRasterop (expanded_mask, padding, padding,
459
- pixGetWidth (mask), pixGetHeight (mask),
460
- PIX_SRC, mask, 0 , 0 );
460
+ if (level == RIL_BLOCK || level == RIL_PARA) {
461
+ // Clip to the block polygon as well.
462
+ TBOX mask_box;
463
+ Pix* mask = it_->block ()->block ->render_mask (&mask_box);
464
+ // Copy the mask registered correctly into an image the size of grey_pix.
465
+ int mask_x = *left - mask_box.left ();
466
+ int mask_y = *top - (pixGetHeight (original_img) - mask_box.top ());
467
+ int width = pixGetWidth (grey_pix);
468
+ int height = pixGetHeight (grey_pix);
469
+ Pix* resized_mask = pixCreate (width, height, 1 );
470
+ pixRasterop (resized_mask, MAX (0 , -mask_x), MAX (0 , -mask_y), width, height,
471
+ PIX_SRC, mask, MAX (0 , mask_x), MAX (0 , mask_y));
461
472
pixDestroy (&mask);
462
- pixDilateBrick (expanded_mask, expanded_mask, 2 *padding + 1 , 2 *padding + 1 );
463
- pixInvert (expanded_mask, expanded_mask);
464
- pixSetMasked (grey_pix, expanded_mask, MAX_UINT32);
465
- pixDestroy (&expanded_mask);
473
+ pixDilateBrick (resized_mask, resized_mask, 2 * padding + 1 ,
474
+ 2 * padding + 1 );
475
+ pixInvert (resized_mask, resized_mask);
476
+ pixSetMasked (grey_pix, resized_mask, MAX_UINT32);
477
+ pixDestroy (&resized_mask);
466
478
}
467
479
return grey_pix;
468
480
}
0 commit comments