Skip to content

Commit 0f0eaa9

Browse files
Shreeshriistweil
authored andcommitted
Partial fix for layout_test and dawg_test
1 parent 633ccbc commit 0f0eaa9

File tree

2 files changed

+50
-44
lines changed

2 files changed

+50
-44
lines changed

unittest/dawg_test.cc

+23-21
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
#include <string>
44
#include <vector>
55

6-
#include "util/process/subprocess.h"
6+
#include "ratngs.h"
7+
#include "unicharset.h"
8+
#include "trie.h"
79

8-
#include "tesseract/ccstruct/ratngs.h"
9-
#include "tesseract/ccutil/unicharset.h"
10-
#include "tesseract/dict/trie.h"
10+
#include "include_gunit.h"
11+
#include "base/filelinereader.h"
12+
#include "util/process/subprocess.h"
1113

1214
namespace {
1315

@@ -18,7 +20,7 @@ void RemoveTrailingLineTerminators(char* line) {
1820
}
1921
}
2022

21-
void AddLineToSet(std::set<string>* words, char* line) {
23+
void AddLineToSet(std::set<std::string>* words, char* line) {
2224
RemoveTrailingLineTerminators(line);
2325
words->insert(line);
2426
}
@@ -27,26 +29,26 @@ void AddLineToSet(std::set<string>* words, char* line) {
2729
// aka Directed Acyclic Word Graphs).
2830
class DawgTest : public testing::Test {
2931
protected:
30-
void LoadWordlist(const string& filename, std::set<string>* words) const {
32+
void LoadWordlist(const std::string& filename, std::set<std::string>* words) const {
3133
FileLineReader::Options options;
3234
options.set_comment_char(0);
3335
FileLineReader flr(filename.c_str(), options);
3436
flr.set_line_callback(NewPermanentCallback(AddLineToSet, words));
3537
flr.Reload();
3638
}
37-
string TestDataNameToPath(const string& name) const {
38-
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
39+
std::string TestDataNameToPath(const std::string& name) const {
40+
return file::JoinPath(TESTDATA_DIR, "/" + name);
3941
}
40-
string TessBinaryPath(const string& binary_name) const {
41-
return file::JoinPath(FLAGS_test_srcdir,
42+
std::string TessBinaryPath(const std::string& binary_name) const {
43+
return file::JoinPath(TESS_SRC_DIR,
4244
}
43-
string OutputNameToPath(const string& name) const {
45+
std::string OutputNameToPath(const std::string& name) const {
4446
return file::JoinPath(FLAGS_test_tmpdir, name);
4547
}
46-
int RunCommand(const string& program, const string& arg1, const string& arg2,
47-
const string& arg3) const {
48+
int RunCommand(const std::string& program, const std::string& arg1, const std::string& arg2,
49+
const std::string& arg3) const {
4850
SubProcess p;
49-
std::vector<string> argv;
51+
std::vector<std::string> argv;
5052
argv.push_back(program);
5153
argv.push_back(arg1);
5254
argv.push_back(arg2);
@@ -59,13 +61,13 @@ class DawgTest : public testing::Test {
5961
// Test that we are able to convert a wordlist file (one "word" per line) to
6062
// a dawg (a compressed format) and then extract the original wordlist back
6163
// out using the tools "wordlist2dawg" and "dawg2wordlist."
62-
void TestDawgRoundTrip(const string& unicharset_filename,
63-
const string& wordlist_filename) const {
64-
std::set<string> orig_words, roundtrip_words;
65-
string unicharset = TestDataNameToPath(unicharset_filename);
66-
string orig_wordlist = TestDataNameToPath(wordlist_filename);
67-
string output_dawg = OutputNameToPath(wordlist_filename + ".dawg");
68-
string output_wordlist = OutputNameToPath(wordlist_filename);
64+
void TestDawgRoundTrip(const std::string& unicharset_filename,
65+
const std::string& wordlist_filename) const {
66+
std::set<std::string> orig_words, roundtrip_words;
67+
std::string unicharset = TestDataNameToPath(unicharset_filename);
68+
std::string orig_wordlist = TestDataNameToPath(wordlist_filename);
69+
std::string output_dawg = OutputNameToPath(wordlist_filename + ".dawg");
70+
std::string output_wordlist = OutputNameToPath(wordlist_filename);
6971
LoadWordlist(orig_wordlist, &orig_words);
7072
EXPECT_EQ(
7173
RunCommand("wordlist2dawg", orig_wordlist, output_dawg, unicharset), 0);

unittest/layout_test.cc

+27-23
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11

22
#include <string>
33
#include <utility>
4-
#include "leptonica/include/allheaders.h"
5-
#include "tesseract/api/baseapi.h"
6-
#include "tesseract/ccmain/mutableiterator.h"
7-
#include "tesseract/ccmain/resultiterator.h"
8-
#include "tesseract/ccstruct/coutln.h"
9-
#include "tesseract/ccstruct/pageres.h"
10-
#include "tesseract/ccstruct/polyblk.h"
11-
#include "tesseract/ccstruct/stepblob.h"
4+
5+
#include "include_gunit.h"
6+
7+
#include "allheaders.h"
8+
#include "baseapi.h"
9+
#include "coutln.h"
10+
#include "log.h" // for LOG
11+
#include "mutableiterator.h"
12+
#include "pageres.h"
13+
#include "polyblk.h"
14+
#include "resultiterator.h"
15+
#include "stepblob.h"
1216

1317
namespace {
1418

@@ -25,11 +29,11 @@ const PolyBlockType kBlocks8087_054[] = {PT_HEADING_TEXT, PT_FLOWING_TEXT,
2529
// The fixture for testing Tesseract.
2630
class LayoutTest : public testing::Test {
2731
protected:
28-
string TestDataNameToPath(const string& name) {
29-
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
32+
std::string TestDataNameToPath(const std::string& name) {
33+
return file::JoinPath(TESTING_DIR, "/" + name);
3034
}
31-
string TessdataPath() {
32-
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
35+
std::string TessdataPath() {
36+
return file::JoinPath(TESSDATA_DIR, "");
3337
}
3438

3539
LayoutTest() { src_pix_ = nullptr; }
@@ -57,21 +61,20 @@ class LayoutTest : public testing::Test {
5761
char* block_text = it->GetUTF8Text(tesseract::RIL_BLOCK);
5862
if (block_text != nullptr && it->BlockType() == blocks[string_index] &&
5963
strstr(block_text, strings[string_index]) != nullptr) {
60-
VLOG(1) << StringPrintf("Found string %s in block %d of type %s",
64+
LOG(INFO) << "Found string %s in block %d of type %s" <<
6165
strings[string_index], block_index,
62-
kPolyBlockNames[blocks[string_index]]);
66+
kPolyBlockNames[blocks[string_index]];
6367
// Found this one.
6468
++string_index;
6569
} else if (it->BlockType() == blocks[string_index] &&
6670
block_text == nullptr && strings[string_index][0] == '\0') {
67-
VLOG(1) << StringPrintf("Found block of type %s at block %d",
71+
LOG(INFO) << "Found block of type %s at block %d" <<
6872
kPolyBlockNames[blocks[string_index]],
69-
block_index);
73+
block_index;
7074
// Found this one.
7175
++string_index;
7276
} else {
73-
VLOG(1) << StringPrintf("No match found in block with text:\n%s",
74-
block_text);
77+
LOG(INFO) << "No match found in block with text:\n%s" << block_text;
7578
}
7679
delete[] block_text;
7780
++block_index;
@@ -97,7 +100,7 @@ class LayoutTest : public testing::Test {
97100
PTIsTextType(it->BlockType()) && right - left > 800 &&
98101
bottom - top > 200) {
99102
if (prev_right > prev_left) {
100-
if (min(right, prev_right) > max(left, prev_left)) {
103+
if (std::min(right, prev_right) > std::max(left, prev_left)) {
101104
EXPECT_GE(top, prev_bottom) << "Overlapping block should be below";
102105
} else if (top < prev_bottom) {
103106
if (right_to_left) {
@@ -156,7 +159,7 @@ class LayoutTest : public testing::Test {
156159
}
157160

158161
Pix* src_pix_;
159-
string ocr_text_;
162+
std::string ocr_text_;
160163
tesseract::TessBaseAPI api_;
161164
};
162165

@@ -173,9 +176,10 @@ TEST_F(LayoutTest, UNLV8087_054) {
173176
}
174177

175178
// Tests that Tesseract gets the important blocks and in the right order
176-
// on a UNLV page numbered 8087_054.3B.tif. (Dubrovnik)
179+
// on GOOGLE:13510798882202548:74:84.sj-79.tif (Hebrew image)
180+
// TODO: replace hebrew.png by Google image referred above
177181
TEST_F(LayoutTest, HebrewOrderingAndSkew) {
178-
SetImage("GOOGLE:13510798882202548:74:84.sj-79.tif", "eng");
182+
SetImage("hebrew.png", "eng");
179183
// Just run recognition.
180184
EXPECT_EQ(api_.Recognize(nullptr), 0);
181185
tesseract::MutableIterator* it = api_.GetMutableIterator();
@@ -184,7 +188,7 @@ TEST_F(LayoutTest, HebrewOrderingAndSkew) {
184188
VerifyTotalContainment(1, it);
185189
delete it;
186190
// Now try again using Hebrew.
187-
SetImage("GOOGLE:13510798882202548:74:84.sj-79.tif", "heb");
191+
SetImage("hebrew.png", "heb");
188192
// Just run recognition.
189193
EXPECT_EQ(api_.Recognize(nullptr), 0);
190194
it = api_.GetMutableIterator();

0 commit comments

Comments
 (0)