|
33 | 33 |
|
34 | 34 | namespace tesseract {
|
35 | 35 | Classify::Classify()
|
36 |
| - : BOOL_MEMBER(prioritize_division, FALSE, |
37 |
| - "Prioritize blob division over chopping", this->params()), |
38 |
| - INT_MEMBER(tessedit_single_match, FALSE, |
39 |
| - "Top choice only from CP", this->params()), |
40 |
| - BOOL_MEMBER(classify_enable_learning, true, |
41 |
| - "Enable adaptive classifier", this->params()), |
42 |
| - INT_MEMBER(classify_debug_level, 0, "Classify debug level", |
43 |
| - this->params()), |
44 |
| - INT_MEMBER(classify_norm_method, character, "Normalization Method ...", |
45 |
| - this->params()), |
46 |
| - double_MEMBER(classify_char_norm_range, 0.2, |
47 |
| - "Character Normalization Range ...", this->params()), |
48 |
| - double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...", |
49 |
| - this->params()), /* PREV DEFAULT 0.1 */ |
50 |
| - double_MEMBER(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...", |
51 |
| - this->params()), /* PREV DEFAULT 0.3 */ |
52 |
| - double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...", |
53 |
| - this->params()), /* PREV DEFAULT 0.1 */ |
54 |
| - double_MEMBER(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...", |
55 |
| - this->params()), /* PREV DEFAULT 0.3 */ |
56 |
| - double_MEMBER(classify_max_rating_ratio, 1.5, |
57 |
| - "Veto ratio between classifier ratings", this->params()), |
58 |
| - double_MEMBER(classify_max_certainty_margin, 5.5, |
59 |
| - "Veto difference between classifier certainties", |
| 36 | + : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping", |
60 | 37 | this->params()),
|
61 |
| - BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", |
62 |
| - this->params()), |
63 |
| - BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", |
64 |
| - this->params()), |
65 |
| - BOOL_MEMBER(classify_enable_adaptive_matcher, 1, |
66 |
| - "Enable adaptive classifier", |
67 |
| - this->params()), |
68 |
| - BOOL_MEMBER(classify_use_pre_adapted_templates, 0, |
69 |
| - "Use pre-adapted classifier templates", this->params()), |
70 |
| - BOOL_MEMBER(classify_save_adapted_templates, 0, |
71 |
| - "Save adapted templates to a file", this->params()), |
72 |
| - BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", |
73 |
| - this->params()), |
74 |
| - BOOL_MEMBER(classify_nonlinear_norm, 0, |
75 |
| - "Non-linear stroke-density normalization", this->params()), |
76 |
| - INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()), |
77 |
| - INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()), |
78 |
| - INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", |
79 |
| - this->params()), |
80 |
| - double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", |
| 38 | + BOOL_MEMBER(prioritize_division, FALSE, |
| 39 | + "Prioritize blob division over chopping", this->params()), |
| 40 | + INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP", |
| 41 | + this->params()), |
| 42 | + BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier", |
81 | 43 | this->params()),
|
82 |
| - double_MEMBER(matcher_great_threshold, 0.0, "Great Match (0-1)", |
| 44 | + INT_MEMBER(classify_debug_level, 0, "Classify debug level", |
| 45 | + this->params()), |
| 46 | + INT_MEMBER(classify_norm_method, character, "Normalization Method ...", |
| 47 | + this->params()), |
| 48 | + double_MEMBER(classify_char_norm_range, 0.2, |
| 49 | + "Character Normalization Range ...", this->params()), |
| 50 | + double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...", |
| 51 | + this->params()), /* PREV DEFAULT 0.1 */ |
| 52 | + double_MEMBER(classify_max_norm_scale_x, 0.325, |
| 53 | + "Max char x-norm scale ...", |
| 54 | + this->params()), /* PREV DEFAULT 0.3 */ |
| 55 | + double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...", |
| 56 | + this->params()), /* PREV DEFAULT 0.1 */ |
| 57 | + double_MEMBER(classify_max_norm_scale_y, 0.325, |
| 58 | + "Max char y-norm scale ...", |
| 59 | + this->params()), /* PREV DEFAULT 0.3 */ |
| 60 | + double_MEMBER(classify_max_rating_ratio, 1.5, |
| 61 | + "Veto ratio between classifier ratings", this->params()), |
| 62 | + double_MEMBER(classify_max_certainty_margin, 5.5, |
| 63 | + "Veto difference between classifier certainties", |
| 64 | + this->params()), |
| 65 | + BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", |
83 | 66 | this->params()),
|
84 |
| - double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", |
| 67 | + BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", |
85 | 68 | this->params()),
|
86 |
| - double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", |
| 69 | + BOOL_MEMBER(classify_enable_adaptive_matcher, 1, |
| 70 | + "Enable adaptive classifier", this->params()), |
| 71 | + BOOL_MEMBER(classify_use_pre_adapted_templates, 0, |
| 72 | + "Use pre-adapted classifier templates", this->params()), |
| 73 | + BOOL_MEMBER(classify_save_adapted_templates, 0, |
| 74 | + "Save adapted templates to a file", this->params()), |
| 75 | + BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", |
87 | 76 | this->params()),
|
88 |
| - double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", |
| 77 | + BOOL_MEMBER(classify_nonlinear_norm, 0, |
| 78 | + "Non-linear stroke-density normalization", this->params()), |
| 79 | + INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()), |
| 80 | + INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()), |
| 81 | + INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", |
| 82 | + this->params()), |
| 83 | + double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", |
| 84 | + this->params()), |
| 85 | + double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)", |
| 86 | + this->params()), |
| 87 | + double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", |
| 88 | + this->params()), |
| 89 | + double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", |
| 90 | + this->params()), |
| 91 | + double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", |
| 92 | + this->params()), |
| 93 | + double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", |
| 94 | + this->params()), |
| 95 | + INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", |
| 96 | + this->params()), |
| 97 | + INT_MEMBER(matcher_min_examples_for_prototyping, 3, |
| 98 | + "Reliable Config Threshold", this->params()), |
| 99 | + INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5, |
| 100 | + "Enable adaption even if the ambiguities have not been seen", |
| 101 | + this->params()), |
| 102 | + double_MEMBER(matcher_clustering_max_angle_delta, 0.015, |
| 103 | + "Maximum angle delta for prototype clustering", |
| 104 | + this->params()), |
| 105 | + double_MEMBER(classify_misfit_junk_penalty, 0.0, |
| 106 | + "Penalty to apply when a non-alnum is vertically out of " |
| 107 | + "its expected textline position", |
| 108 | + this->params()), |
| 109 | + double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()), |
| 110 | + double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", |
| 111 | + this->params()), |
| 112 | + double_MEMBER(tessedit_class_miss_scale, 0.00390625, |
| 113 | + "Scale factor for features not used", this->params()), |
| 114 | + double_MEMBER( |
| 115 | + classify_adapted_pruning_factor, 2.5, |
| 116 | + "Prune poor adapted results this much worse than best result", |
| 117 | + this->params()), |
| 118 | + double_MEMBER(classify_adapted_pruning_threshold, -1.0, |
| 119 | + "Threshold at which classify_adapted_pruning_factor starts", |
| 120 | + this->params()), |
| 121 | + INT_MEMBER(classify_adapt_proto_threshold, 230, |
| 122 | + "Threshold for good protos during adaptive 0-255", |
| 123 | + this->params()), |
| 124 | + INT_MEMBER(classify_adapt_feature_threshold, 230, |
| 125 | + "Threshold for good features during adaptive 0-255", |
| 126 | + this->params()), |
| 127 | + BOOL_MEMBER(disable_character_fragments, TRUE, |
| 128 | + "Do not include character fragments in the" |
| 129 | + " results of the classifier", |
89 | 130 | this->params()),
|
90 |
| - double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", |
| 131 | + double_MEMBER(classify_character_fragments_garbage_certainty_threshold, |
| 132 | + -3.0, |
| 133 | + "Exclude fragments that do not look like whole" |
| 134 | + " characters from training and adaption", |
| 135 | + this->params()), |
| 136 | + BOOL_MEMBER(classify_debug_character_fragments, FALSE, |
| 137 | + "Bring up graphical debugging windows for fragments training", |
91 | 138 | this->params()),
|
92 |
| - INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", |
93 |
| - this->params()), |
94 |
| - INT_MEMBER(matcher_min_examples_for_prototyping, 3, |
95 |
| - "Reliable Config Threshold", this->params()), |
96 |
| - INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5, |
97 |
| - "Enable adaption even if the ambiguities have not been seen", |
98 |
| - this->params()), |
99 |
| - double_MEMBER(matcher_clustering_max_angle_delta, 0.015, |
100 |
| - "Maximum angle delta for prototype clustering", |
| 139 | + BOOL_MEMBER(matcher_debug_separate_windows, FALSE, |
| 140 | + "Use two different windows for debugging the matching: " |
| 141 | + "One for the protos and one for the features.", |
101 | 142 | this->params()),
|
102 |
| - double_MEMBER(classify_misfit_junk_penalty, 0.0, |
103 |
| - "Penalty to apply when a non-alnum is vertically out of " |
104 |
| - "its expected textline position", |
105 |
| - this->params()), |
106 |
| - double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()), |
107 |
| - double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", |
108 |
| - this->params()), |
109 |
| - double_MEMBER(tessedit_class_miss_scale, 0.00390625, |
110 |
| - "Scale factor for features not used", this->params()), |
111 |
| - double_MEMBER(classify_adapted_pruning_factor, 2.5, |
112 |
| - "Prune poor adapted results this much worse than best result", |
113 |
| - this->params()), |
114 |
| - double_MEMBER(classify_adapted_pruning_threshold, -1.0, |
115 |
| - "Threshold at which classify_adapted_pruning_factor starts", |
116 |
| - this->params()), |
117 |
| - INT_MEMBER(classify_adapt_proto_threshold, 230, |
118 |
| - "Threshold for good protos during adaptive 0-255", |
119 |
| - this->params()), |
120 |
| - INT_MEMBER(classify_adapt_feature_threshold, 230, |
121 |
| - "Threshold for good features during adaptive 0-255", |
122 |
| - this->params()), |
123 |
| - BOOL_MEMBER(disable_character_fragments, TRUE, |
124 |
| - "Do not include character fragments in the" |
125 |
| - " results of the classifier", this->params()), |
126 |
| - double_MEMBER(classify_character_fragments_garbage_certainty_threshold, |
127 |
| - -3.0, "Exclude fragments that do not look like whole" |
128 |
| - " characters from training and adaption", this->params()), |
129 |
| - BOOL_MEMBER(classify_debug_character_fragments, FALSE, |
130 |
| - "Bring up graphical debugging windows for fragments training", |
131 |
| - this->params()), |
132 |
| - BOOL_MEMBER(matcher_debug_separate_windows, FALSE, |
133 |
| - "Use two different windows for debugging the matching: " |
134 |
| - "One for the protos and one for the features.", this->params()), |
135 |
| - STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", |
136 |
| - this->params()), |
137 |
| - INT_MEMBER(classify_class_pruner_threshold, 229, |
138 |
| - "Class Pruner Threshold 0-255", this->params()), |
139 |
| - INT_MEMBER(classify_class_pruner_multiplier, 15, |
140 |
| - "Class Pruner Multiplier 0-255: ", this->params()), |
141 |
| - INT_MEMBER(classify_cp_cutoff_strength, 7, |
142 |
| - "Class Pruner CutoffStrength: ", this->params()), |
143 |
| - INT_MEMBER(classify_integer_matcher_multiplier, 10, |
144 |
| - "Integer Matcher Multiplier 0-255: ", this->params()), |
145 |
| - EnableLearning(true), |
146 |
| - INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word", |
147 |
| - this->params()), |
148 |
| - BOOL_MEMBER(classify_bln_numeric_mode, 0, |
149 |
| - "Assume the input is numbers [0-9].", this->params()), |
150 |
| - double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", |
151 |
| - this->params()), |
152 |
| - double_MEMBER(speckle_rating_penalty, 10.0, |
153 |
| - "Penalty to add to worst rating for noise", this->params()), |
154 |
| - shape_table_(NULL), |
155 |
| - dict_(this), |
156 |
| - static_classifier_(NULL) { |
| 143 | + STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", |
| 144 | + this->params()), |
| 145 | + INT_MEMBER(classify_class_pruner_threshold, 229, |
| 146 | + "Class Pruner Threshold 0-255", this->params()), |
| 147 | + INT_MEMBER(classify_class_pruner_multiplier, 15, |
| 148 | + "Class Pruner Multiplier 0-255: ", this->params()), |
| 149 | + INT_MEMBER(classify_cp_cutoff_strength, 7, |
| 150 | + "Class Pruner CutoffStrength: ", this->params()), |
| 151 | + INT_MEMBER(classify_integer_matcher_multiplier, 10, |
| 152 | + "Integer Matcher Multiplier 0-255: ", this->params()), |
| 153 | + EnableLearning(true), |
| 154 | + INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word", |
| 155 | + this->params()), |
| 156 | + BOOL_MEMBER(classify_bln_numeric_mode, 0, |
| 157 | + "Assume the input is numbers [0-9].", this->params()), |
| 158 | + double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", |
| 159 | + this->params()), |
| 160 | + double_MEMBER(speckle_rating_penalty, 10.0, |
| 161 | + "Penalty to add to worst rating for noise", this->params()), |
| 162 | + shape_table_(NULL), |
| 163 | + dict_(this), |
| 164 | + static_classifier_(NULL) { |
157 | 165 | fontinfo_table_.set_compare_callback(
|
158 | 166 | NewPermanentTessCallback(CompareFontInfo));
|
159 | 167 | fontinfo_table_.set_clear_callback(
|
|
0 commit comments