2
2
// File: cjkpitch.cpp
3
3
// Description: Code to determine fixed pitchness and the pitch if fixed,
4
4
// for CJK text.
5
- // Copyright 2011 Google Inc. All Rights Reserved.
6
- // Author: takenaka@google.com (Hiroshi Takenaka)
5
+ // Author: takenaka@google.com (Hiroshi Takenaka)
7
6
// Created: Mon Jun 27 12:48:35 JST 2011
8
7
//
8
+ // Copyright 2011 Google Inc. All Rights Reserved.
9
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
10
// you may not use this file except in compliance with the License.
11
11
// You may obtain a copy of the License at
17
17
// limitations under the License.
18
18
//
19
19
// /////////////////////////////////////////////////////////////////////
20
+
20
21
#include " cjkpitch.h"
21
22
#include " genericvector.h"
22
23
#include " topitch.h"
23
24
#include " tovars.h"
24
25
25
26
#include < algorithm>
27
+ #include < vector> // for std::vector
26
28
27
29
BOOL_VAR (textord_space_size_is_variable, FALSE ,
28
30
" If true, word delimiter spaces are assumed to have "
@@ -343,7 +345,7 @@ class FPRow {
343
345
return gap_;
344
346
}
345
347
346
- int num_chars () {
348
+ size_t num_chars () {
347
349
return characters_.size ();
348
350
}
349
351
FPChar *character (int i) {
@@ -556,7 +558,7 @@ void FPRow::OutputEstimations() {
556
558
557
559
// Make max_nonspace larger than any intra-character gap so that
558
560
// make_prop_words() won't break a row at the middle of a character.
559
- for (int i = 0 ; i < num_chars (); ++i) {
561
+ for (size_t i = 0 ; i < num_chars (); ++i) {
560
562
if (characters_[i].max_gap () > real_row_->max_nonspace ) {
561
563
real_row_->max_nonspace = characters_[i].max_gap ();
562
564
}
@@ -572,7 +574,7 @@ void FPRow::OutputEstimations() {
572
574
cell_it.add_after_then_move (cell);
573
575
574
576
int right = real_body (0 ).right ();
575
- for (int i = 1 ; i < num_chars (); ++i) {
577
+ for (size_t i = 1 ; i < num_chars (); ++i) {
576
578
// Put a word break if gap between two characters is bigger than
577
579
// space_threshold. Don't break if none of two characters
578
580
// couldn't be "finalized", because maybe they need to be merged
@@ -614,7 +616,7 @@ void FPRow::EstimatePitch(bool pass1) {
614
616
cx0 = center_x (0 );
615
617
616
618
heights_.Add (box (0 ).height ());
617
- for (int i = 1 ; i < num_chars (); i++) {
619
+ for (size_t i = 1 ; i < num_chars (); i++) {
618
620
cx1 = center_x (i);
619
621
int32_t pitch = cx1 - cx0;
620
622
int32_t gap = std::max (0 , real_body (i - 1 ).x_gap (real_body (i)));
@@ -680,7 +682,7 @@ void FPRow::DebugOutputResult(int row_index) {
680
682
real_row_->space_size , real_row_->space_threshold ,
681
683
real_row_->xheight );
682
684
683
- for (int i = 0 ; i < num_chars (); i++) {
685
+ for (size_t i = 0 ; i < num_chars (); i++) {
684
686
tprintf (" Char %d: is_final=%d is_good=%d num_blobs=%d: " ,
685
687
i, is_final (i), is_good (i), character (i)->num_blobs ());
686
688
box (i).print ();
@@ -692,14 +694,14 @@ void FPRow::Pass1Analyze() {
692
694
if (num_chars () < 2 ) return ;
693
695
694
696
if (estimated_pitch_ > 0 .0f ) {
695
- for (int i = 2 ; i < num_chars (); i++) {
697
+ for (size_t i = 2 ; i < num_chars (); i++) {
696
698
if (is_good_pitch (estimated_pitch_, box (i - 2 ), box (i-1 )) &&
697
699
is_good_pitch (estimated_pitch_, box (i - 1 ), box (i))) {
698
700
mark_good (i - 1 );
699
701
}
700
702
}
701
703
} else {
702
- for (int i = 2 ; i < num_chars (); i++) {
704
+ for (size_t i = 2 ; i < num_chars (); i++) {
703
705
if (is_good_pitch (box_pitch (box (i-2 ), box (i-1 )), box (i - 1 ), box (i))) {
704
706
mark_good (i - 1 );
705
707
}
@@ -715,7 +717,7 @@ bool FPRow::Pass2Analyze() {
715
717
if (num_chars () <= 1 || estimated_pitch_ == 0 .0f ) {
716
718
return false ;
717
719
}
718
- for (int i = 0 ; i < num_chars (); i++) {
720
+ for (size_t i = 0 ; i < num_chars (); i++) {
719
721
if (is_final (i)) continue ;
720
722
721
723
FPChar::Alignment alignment = character (i)->alignment ();
@@ -786,7 +788,7 @@ bool FPRow::Pass2Analyze() {
786
788
}
787
789
TBOX ibody (c1 - estimated_pitch_, box (i).bottom (), c1, box (i).top ());
788
790
789
- int j = i;
791
+ size_t j = i;
790
792
TBOX merged;
791
793
while (j < num_chars () && !is_final (j) && mostly_overlap (ibody, box (j)) &&
792
794
merged.bounding_union (box (j)).height () <
@@ -809,7 +811,7 @@ bool FPRow::Pass2Analyze() {
809
811
character (i)->set_merge_to_prev (false );
810
812
finalize (i);
811
813
} else {
812
- for (int k = i + 1 ; k < j; k++) {
814
+ for (size_t k = i + 1 ; k < j; k++) {
813
815
character (k)->set_merge_to_prev (true );
814
816
}
815
817
}
@@ -832,7 +834,7 @@ bool FPRow::Pass2Analyze() {
832
834
void FPRow::MergeFragments () {
833
835
int last_char = 0 ;
834
836
835
- for (int j = 0 ; j < num_chars (); ++j) {
837
+ for (size_t j = 0 ; j < num_chars (); ++j) {
836
838
if (character (j)->merge_to_prev ()) {
837
839
character (last_char)->Merge (*character (j));
838
840
character (j)->set_delete_flag (true );
@@ -847,7 +849,7 @@ void FPRow::MergeFragments() {
847
849
848
850
void FPRow::FinalizeLargeChars () {
849
851
float row_pitch = estimated_pitch ();
850
- for (int i = 0 ; i < num_chars (); i++) {
852
+ for (size_t i = 0 ; i < num_chars (); i++) {
851
853
if (is_final (i)) continue ;
852
854
853
855
// Finalize if both neighbors are finalized. We have no other choice.
@@ -888,7 +890,7 @@ void FPRow::FinalizeLargeChars() {
888
890
// character L on its left and a not-finalized character R on its
889
891
// right, we mark C as good if the pitch between C and L is good,
890
892
// regardless of the pitch between C and R.
891
- for (int i = 0 ; i < num_chars (); i++) {
893
+ for (size_t i = 0 ; i < num_chars (); i++) {
892
894
if (!is_final (i)) continue ;
893
895
bool good_pitch = false ;
894
896
bool bad_pitch = false ;
@@ -919,7 +921,7 @@ class FPAnalyzer {
919
921
void Init (ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
920
922
921
923
void Pass1Analyze () {
922
- for (int i = 0 ; i < rows_.size (); i++) rows_[i].Pass1Analyze ();
924
+ for (size_t i = 0 ; i < rows_.size (); i++) rows_[i].Pass1Analyze ();
923
925
}
924
926
925
927
// Estimate character pitch for each row. The argument pass1 can be
@@ -934,16 +936,16 @@ class FPAnalyzer {
934
936
}
935
937
936
938
void MergeFragments () {
937
- for (int i = 0 ; i < rows_.size (); i++) rows_[i].MergeFragments ();
939
+ for (size_t i = 0 ; i < rows_.size (); i++) rows_[i].MergeFragments ();
938
940
}
939
941
940
942
void FinalizeLargeChars () {
941
- for (int i = 0 ; i < rows_.size (); i++) rows_[i].FinalizeLargeChars ();
943
+ for (size_t i = 0 ; i < rows_.size (); i++) rows_[i].FinalizeLargeChars ();
942
944
}
943
945
944
946
bool Pass2Analyze () {
945
947
bool changed = false ;
946
- for (int i = 0 ; i < rows_.size (); i++) {
948
+ for (size_t i = 0 ; i < rows_.size (); i++) {
947
949
if (rows_[i].Pass2Analyze ()) {
948
950
changed = true ;
949
951
}
@@ -952,33 +954,34 @@ class FPAnalyzer {
952
954
}
953
955
954
956
void OutputEstimations () {
955
- for (int i = 0 ; i < rows_.size (); i++) rows_[i].OutputEstimations ();
957
+ for (size_t i = 0 ; i < rows_.size (); i++) rows_[i].OutputEstimations ();
956
958
// Don't we need page-level estimation of gaps/spaces?
957
959
}
958
960
959
961
void DebugOutputResult () {
960
962
tprintf (" FPAnalyzer: final result\n " );
961
- for (int i = 0 ; i < rows_.size (); i++) rows_[i].DebugOutputResult (i);
963
+ for (size_t i = 0 ; i < rows_.size (); i++) rows_[i].DebugOutputResult (i);
962
964
}
963
965
964
- int num_rows () {
966
+ size_t num_rows () {
965
967
return rows_.size ();
966
968
}
967
969
968
970
// Returns the upper limit for pass2 loop iteration.
969
- int max_iteration () {
971
+ unsigned max_iteration () {
970
972
// We're fixing at least one character per iteration. So basically
971
973
// we shouldn't require more than max_chars_per_row_ iterations.
972
974
return max_chars_per_row_ + 100 ;
973
975
}
974
976
975
977
private:
976
978
ICOORD page_tr_;
977
- GenericVector<FPRow> rows_;
978
- int num_tall_rows_;
979
- int num_bad_rows_;
980
- int num_empty_rows_;
981
- int max_chars_per_row_;
979
+ std::vector<FPRow> rows_;
980
+ unsigned num_tall_rows_;
981
+ unsigned num_bad_rows_;
982
+ // TODO: num_empty_rows_ is incremented, but never used overwise.
983
+ unsigned num_empty_rows_;
984
+ unsigned max_chars_per_row_;
982
985
};
983
986
984
987
void FPAnalyzer::Init (ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
@@ -1005,7 +1008,7 @@ void FPAnalyzer::Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
1005
1008
FPRow row;
1006
1009
row.Init (row_it.data ());
1007
1010
rows_.push_back (row);
1008
- int num_chars = rows_.back ().num_chars ();
1011
+ size_t num_chars = rows_.back ().num_chars ();
1009
1012
if (num_chars <= 1 ) num_empty_rows_++;
1010
1013
if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
1011
1014
}
@@ -1018,7 +1021,7 @@ void FPAnalyzer::EstimatePitch(bool pass1) {
1018
1021
num_tall_rows_ = 0 ;
1019
1022
num_bad_rows_ = 0 ;
1020
1023
pitch_height_stats.Clear ();
1021
- for (int i = 0 ; i < rows_.size (); i++) {
1024
+ for (size_t i = 0 ; i < rows_.size (); i++) {
1022
1025
rows_[i].EstimatePitch (pass1);
1023
1026
if (rows_[i].good_pitches ()) {
1024
1027
pitch_height_stats.Add (rows_[i].height () + rows_[i].gap (),
@@ -1030,7 +1033,7 @@ void FPAnalyzer::EstimatePitch(bool pass1) {
1030
1033
}
1031
1034
1032
1035
pitch_height_stats.Finish ();
1033
- for (int i = 0 ; i < rows_.size (); i++) {
1036
+ for (size_t i = 0 ; i < rows_.size (); i++) {
1034
1037
if (rows_[i].good_pitches () >= 5 ) {
1035
1038
// We have enough evidences. Just use the pitch estimation
1036
1039
// from this row.
@@ -1077,7 +1080,7 @@ void compute_fixed_pitch_cjk(ICOORD page_tr,
1077
1080
return ;
1078
1081
}
1079
1082
1080
- int iteration = 0 ;
1083
+ size_t iteration = 0 ;
1081
1084
do {
1082
1085
analyzer.MergeFragments ();
1083
1086
analyzer.FinalizeLargeChars ();
0 commit comments