-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbaseapi.cpp.patch
110 lines (106 loc) · 3.65 KB
/
baseapi.cpp.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
--- api/baseapi.cpp 2014-01-11 22:18:08.670604608 -0500
+++ /tmp/change/baseapi.cpp 2014-01-11 22:17:22.938602714 -0500
@@ -1891,26 +1891,29 @@
}
/** Sets Dict::letter_is_okay_ function to point to the given function. */
+/*
void TessBaseAPI::SetDictFunc(DictFunc f) {
if (tesseract_ != NULL) {
tesseract_->getDict().letter_is_okay_ = f;
}
}
+*/
/**
* Sets Dict::probability_in_context_ function to point to the given
* function.
*/
+/*
void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
if (tesseract_ != NULL) {
tesseract_->getDict().probability_in_context_ = f;
- // Set it for the sublangs too.
int num_subs = tesseract_->num_sub_langs();
for (int i = 0; i < num_subs; ++i) {
tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
}
}
}
+*/
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
@@ -2375,7 +2378,17 @@
* and the bounding boxes of the characters.
*/
static void extract_result(TESS_CHAR_IT* out,
- PAGE_RES* page_res) {
+ PAGE_RES* page_res,
+ char** char_4_coords,
+ int** char_x0,
+ int** char_y0,
+ int** char_x1,
+ int** char_y1,
+ int chars_length) {
+
+ char buf[100];
+ int cnt = 0;
+
PAGE_RES_IT page_res_it(page_res);
int word_count = 0;
while (page_res_it.word() != NULL) {
@@ -2384,17 +2397,36 @@
const char *len = word->best_choice->unichar_lengths().string();
TBOX real_rect = word->word->bounding_box();
- if (word_count)
+ if (word_count) {
add_space(out);
+ strcpy((char_4_coords)[cnt]," ");
+ (*char_x0)[cnt] = 0;
+ (*char_y0)[cnt] = 0;
+ (*char_x1)[cnt] = 0;
+ (*char_y1)[cnt] = 0;
+ cnt++;
+ }//if
int n = strlen(len);
for (int i = 0; i < n; i++) {
TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
str, *len);
tc->box = real_rect.intersection(word->box_word->BlobBox(i));
+
+ //get unicode representation
+ strncpy(char_4_coords[cnt], tc->unicode_repr, tc->length);
+ char_4_coords[cnt][tc->length] = '\0';
+ (*char_x0)[cnt] = tc->box.left();
+ (*char_y0)[cnt] = tc->box.bottom();
+ (*char_x1)[cnt] = tc->box.right();
+ (*char_y1)[cnt] = tc->box.top();
+
out->add_after_then_move(tc);
str += *len;
+ cnt++;
len++;
}
+
+ strcpy((char_4_coords)[cnt], "");
page_res_it.forward();
word_count++;
}
@@ -2411,10 +2443,17 @@
int** y0,
int** x1,
int** y1,
- PAGE_RES* page_res) {
+ char** char_4_coords,
+ int** char_x0,
+ int** char_y0,
+ int** char_x1,
+ int** char_y1,
+ PAGE_RES* page_res,
+ int chars_length) {
TESS_CHAR_LIST tess_chars;
TESS_CHAR_IT tess_chars_it(&tess_chars);
- extract_result(&tess_chars_it, page_res);
+ extract_result(&tess_chars_it, page_res, char_4_coords,
+ char_x0, char_y0, char_x1, char_y1, chars_length);
tess_chars_it.move_to_first();
int n = tess_chars.length();
int text_len = 0;