Partially fix and enable more unittests

Shreeshrii · Shreeshrii · commit 1ac76d882514 · 2019-01-27T06:49:57.000Z
Add more subtests to langmodel_test

Add more subtests to langmodel_test

fix and enable lstmtrainer_test

fix and enable some subtests from recodebeam_test

partial fix for resultiterator_test

fix typo removing the terminating linefeed.

fix typo

changes
diff --git a/unittest/Makefile.am b/unittest/Makefile.am
@@ -4,6 +4,7 @@ LANGDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/langdata_lstm
 # Absolute path of directory 'tessdata' with traineddata files
 # (must be on same level as top source directory).
 TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
+TESSDATA_BEST_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata_best
 
 # Absolute path of directory 'testing' with test images and ground truth texts
 # (using submodule test).
@@ -15,6 +16,7 @@ TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata
 AM_CPPFLAGS += -DTESSBIN_DIR="\"$(abs_top_builddir)\""
 AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\""
 AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
+AM_CPPFLAGS += -DTESSDATA_BEST_DIR="\"$(TESSDATA_BEST_DIR)\""
 AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
 AM_CPPFLAGS += -DTESTDATA_DIR="\"$(TESTDATA_DIR)\""
 AM_CPPFLAGS += -DPANGO_ENABLE_ENGINE
@@ -98,6 +100,8 @@ AM_CPPFLAGS +=   -isystem $(top_srcdir)/googletest/googletest/include \
                  -isystem $(top_srcdir)/googletest/googlemock/include
 
 check_PROGRAMS = \
+resultiterator_test \
+recodebeam_test \
   apiexample_test \
   applybox_test \
   baseapi_test \
@@ -137,6 +141,7 @@ check_PROGRAMS += commandlineflags_test
 check_PROGRAMS += lstm_recode_test
 check_PROGRAMS += lstm_squashed_test
 check_PROGRAMS += lstm_test
+check_PROGRAMS += lstmtrainer_test
 check_PROGRAMS += unichar_test
 check_PROGRAMS += unicharcompress_test
 check_PROGRAMS += unicharset_test
@@ -228,6 +233,9 @@ lstm_squashed_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) $(TRAINING_
 lstm_test_SOURCES = lstm_test.cc
 lstm_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) $(TRAINING_LIBS)
 
+lstmtrainer_test_SOURCES = lstmtrainer_test.cc
+lstmtrainer_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
+
 mastertrainer_test_SOURCES = mastertrainer_test.cc
 mastertrainer_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS)
 
@@ -253,9 +261,15 @@ progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
 qrsequence_test_SOURCES = qrsequence_test.cc
 qrsequence_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS)
 
+recodebeam_test_SOURCES = recodebeam_test.cc
+recodebeam_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
+
 rect_test_SOURCES = rect_test.cc
 rect_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 
+resultiterator_test_SOURCES = resultiterator_test.cc
+resultiterator_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
+
 shapetable_test_SOURCES = shapetable_test.cc
 shapetable_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS)
 
diff --git a/unittest/include_gunit.h b/unittest/include_gunit.h
@@ -54,6 +54,7 @@ class file : public tesseract::File {
 #define CHECK(test) ASSERT_HOST(test)
 #define CHECK_GT(test, value) ASSERT_HOST((test) > (value))
 #define CHECK_LT(test, value) ASSERT_HOST((test) < (value))
+#define CHECK_LE(test, value) ASSERT_HOST((test) <= (value))
 #define CHECK_OK(test) ASSERT_HOST(test)
 
 #endif  // TESSERACT_UNITTEST_INCLUDE_GUNIT_H_
diff --git a/unittest/lang_model_test.cc b/unittest/lang_model_test.cc
@@ -43,7 +43,7 @@ TEST(LangModelTest, AddACharacter) {
   EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
   std::string version_str = "TestVersion";
   std::string output_dir = FLAGS_test_tmpdir;
-  LOG(INFO) << "Output dir=" << output_dir;
+  LOG(INFO) << "Output dir=" << output_dir << "\n";
   std::string lang1 = "eng";
   bool pass_through_recoder = false;
   GenericVector<STRING> words, puncs, numbers;
@@ -61,13 +61,16 @@ TEST(LangModelTest, AddACharacter) {
   EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir,
                                 lang1, pass_through_recoder, words, puncs,
                                 numbers, lang_is_rtl, nullptr, nullptr));
-  // Init a trainer with it, and encode a string.
+  // Init a trainer with it, and encode kTestString.
   std::string traineddata1 =
       file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
   LSTMTrainer trainer1;
   trainer1.InitCharSet(traineddata1);
   GenericVector<int> labels1;
   EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
+  STRING test1_decoded = trainer1.DecodeLabels(labels1);
+  std::string test1_str(&test1_decoded[0], test1_decoded.length());
+  LOG(INFO) << "Labels1=" << test1_str << "\n";
 
   // Add a new character to the unicharset and try again.
   int size_before = unicharset.size();
@@ -81,13 +84,113 @@ TEST(LangModelTest, AddACharacter) {
             CombineLangModel(unicharset, script_dir, version_str, output_dir,
                              lang2, pass_through_recoder, words, puncs, numbers,
                              lang_is_rtl, nullptr, nullptr));
-  // Init a trainer with it, and encode a string.
+  // Init a trainer with it, and encode kTestString.
   std::string traineddata2 =
       file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
   LSTMTrainer trainer2;
   trainer2.InitCharSet(traineddata2);
   GenericVector<int> labels2;
   EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
+  STRING test2_decoded = trainer2.DecodeLabels(labels2);
+  std::string test2_str(&test2_decoded[0], test2_decoded.length());
+  LOG(INFO) << "Labels2=" << test2_str << "\n";
+  // encode kTestStringRupees.
+  GenericVector<int> labels3;
+  EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
+  STRING test3_decoded = trainer2.DecodeLabels(labels3);
+  std::string test3_str(&test3_decoded[0], test3_decoded.length());
+  LOG(INFO) << "labels3=" << test3_str << "\n";
+  // Copy labels1 to a std::vector, renumbering the null char to match trainer2.
+  // Since Tensor Flow's CTC implementation insists on having the null be the
+  // last label, and we want to be compatible, null has to be renumbered when
+  // we add a class.
+  int null1 = trainer1.null_char();
+  int null2 = trainer2.null_char();
+  EXPECT_EQ(null1 + 1, null2);
+  std::vector<int> labels1_v(labels1.size());
+  for (int i = 0; i < labels1.size(); ++i) {
+    if (labels1[i] == null1)
+      labels1_v[i] = null2;
+    else
+      labels1_v[i] = labels1[i];
+  }
+  EXPECT_THAT(labels1_v,
+              testing::ElementsAreArray(&labels2[0], labels2.size()));
+  // To make sure we we are not cheating somehow, we can now encode the Rupee
+  // symbol, which we could not do before.
+  EXPECT_FALSE(trainer1.EncodeString(kTestStringRupees, &labels1));
+  EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels2));
+}
+
+// Same as above test, for hin instead of eng
+TEST(LangModelTest, AddACharacterHindi) {
+  constexpr char kTestString[] = "हिन्दी में एक लाइन लिखें";
+  constexpr char kTestStringRupees[] = "हिंदी में रूपये का चिन्ह प्रयोग करें ₹१००.००";
+  // Setup the arguments.
+  std::string script_dir = LANGDATA_DIR;
+  std::string hin_dir = file::JoinPath(script_dir, "hin");
+  std::string unicharset_path = TestDataNameToPath("hin_beam.unicharset");
+  UNICHARSET unicharset;
+  EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
+  std::string version_str = "TestVersion";
+  std::string output_dir = FLAGS_test_tmpdir;
+  LOG(INFO) << "Output dir=" << output_dir << "\n";
+  std::string lang1 = "hin";
+  bool pass_through_recoder = false;
+  GenericVector<STRING> words, puncs, numbers;
+  // If these reads fail, we get a warning message and an empty list of words.
+  ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr)
+      .split('\n', &words);
+  EXPECT_GT(words.size(), 0);
+  ReadFile(file::JoinPath(hin_dir, "hin.punc"), nullptr).split('\n', &puncs);
+  EXPECT_GT(puncs.size(), 0);
+  ReadFile(file::JoinPath(hin_dir, "hin.numbers"), nullptr)
+      .split('\n', &numbers);
+  EXPECT_GT(numbers.size(), 0);
+  bool lang_is_rtl = false;
+  // Generate the traineddata file.
+  EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir,
+                                lang1, pass_through_recoder, words, puncs,
+                                numbers, lang_is_rtl, nullptr, nullptr));
+  // Init a trainer with it, and encode kTestString.
+  std::string traineddata1 =
+      file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
+  LSTMTrainer trainer1;
+  trainer1.InitCharSet(traineddata1);
+  GenericVector<int> labels1;
+  EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
+  STRING test1_decoded = trainer1.DecodeLabels(labels1);
+  std::string test1_str(&test1_decoded[0], test1_decoded.length());
+  LOG(INFO) << "Labels1=" << test1_str << "\n";
+
+  // Add a new character to the unicharset and try again.
+  int size_before = unicharset.size();
+  unicharset.unichar_insert("₹");
+  SetupBasicProperties(/*report_errors*/ true, /*decompose (NFD)*/ false,
+                       &unicharset);
+  EXPECT_EQ(size_before + 1, unicharset.size());
+  // Generate the traineddata file.
+  std::string lang2 = "extendedhin";
+  EXPECT_EQ(EXIT_SUCCESS,
+            CombineLangModel(unicharset, script_dir, version_str, output_dir,
+                             lang2, pass_through_recoder, words, puncs, numbers,
+                             lang_is_rtl, nullptr, nullptr));
+  // Init a trainer with it, and encode kTestString.
+  std::string traineddata2 =
+      file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
+  LSTMTrainer trainer2;
+  trainer2.InitCharSet(traineddata2);
+  GenericVector<int> labels2;
+  EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
+  STRING test2_decoded = trainer2.DecodeLabels(labels2);
+  std::string test2_str(&test2_decoded[0], test2_decoded.length());
+  LOG(INFO) << "Labels2=" << test2_str << "\n";
+  // encode kTestStringRupees.
+  GenericVector<int> labels3;
+  EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
+  STRING test3_decoded = trainer2.DecodeLabels(labels3);
+  std::string test3_str(&test3_decoded[0], test3_decoded.length());
+  LOG(INFO) << "labels3=" << test3_str << "\n";
   // Copy labels1 to a std::vector, renumbering the null char to match trainer2.
   // Since Tensor Flow's CTC implementation insists on having the null be the
   // last label, and we want to be compatible, null has to be renumbered when
diff --git a/unittest/lstm_recode_test.cc b/unittest/lstm_recode_test.cc
@@ -19,7 +19,7 @@ namespace tesseract {
 TEST_F(LSTMTrainerTest, RecodeTestKorBase) {
   // A basic single-layer, bi-di 1d LSTM on Korean.
   SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
-               "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false);
+               "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
   double kor_full_err = TrainIterations(kTrainerIterations * 2);
   EXPECT_LT(kor_full_err, 88);
 //  EXPECT_GT(kor_full_err, 85);
@@ -29,7 +29,7 @@ TEST_F(LSTMTrainerTest, RecodeTestKorBase) {
 TEST_F(LSTMTrainerTest, RecodeTestKor) {
   // A basic single-layer, bi-di 1d LSTM on Korean.
   SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
-               "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false);
+               "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
   double kor_recode_err = TrainIterations(kTrainerIterations);
   EXPECT_LT(kor_recode_err, 60);
   LOG(INFO) << "********** Expected  < 60 ************\n" ;
diff --git a/unittest/lstm_squashed_test.cc b/unittest/lstm_squashed_test.cc
@@ -22,7 +22,7 @@ TEST_F(LSTMTrainerTest, TestSquashed) {
   // recoding on, adam on.
   SetupTrainerEng("[1,32,0,1 Ct3,3,16 Mp3,3 Lfys48 Lbx96 O1c1]",
                   "SQU-2-layer-lstm", /*recode*/ true, /*adam*/ true);
-  double lstm_2d_err = TrainIterations(kTrainerIterations * 2);
+  double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
   EXPECT_LT(lstm_2d_err, 80);
   LOG(INFO) << "********** < 80 ************\n" ;
   TestIntMode(kTrainerIterations);
diff --git a/unittest/lstm_test.cc b/unittest/lstm_test.cc
@@ -32,7 +32,7 @@ TEST_F(LSTMTrainerTest, BasicTest) {
       "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
       "Ct1,1,64O1c1]",
       "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false,
-      2e-4, false);
+      2e-4, false, "eng");
   double non_lstm_err = TrainIterations(kTrainerIterations * 4);
   EXPECT_LT(non_lstm_err, 98);
   LOG(INFO) << "********** Expected  < 98 ************\n" ;
diff --git a/unittest/lstm_test.h b/unittest/lstm_test.h
@@ -50,17 +50,25 @@ class LSTMTrainerTest : public testing::Test {
     return file::JoinPath(TESTDATA_DIR,
                           "" + name);
   }
-
+  std::string TessDataNameToPath(const std::string& name) {
+    return file::JoinPath(TESSDATA_DIR,
+                          "" + name);
+  }
+  std::string TestingNameToPath(const std::string& name) {
+    return file::JoinPath(TESTING_DIR,
+                          "" + name);
+  }
+  
   void SetupTrainerEng(const std::string& network_spec, const std::string& model_name,
                        bool recode, bool adam) {
     SetupTrainer(network_spec, model_name, "eng/eng.unicharset",
-                 "eng.Arial.exp0.lstmf", recode, adam, 5e-4, false);
+                 "eng.Arial.exp0.lstmf", recode, adam, 5e-4, false, "eng");
   }
   void SetupTrainer(const std::string& network_spec, const std::string& model_name,
                     const std::string& unicharset_file, const std::string& lstmf_file,
                     bool recode, bool adam, double learning_rate,
-                    bool layer_specific) {
-    constexpr char kLang[] = "eng";  // Exact value doesn't matter.
+                    bool layer_specific, const std::string& kLang) {
+//    constexpr char kLang[] = "eng";  // Exact value doesn't matter.
     std::string unicharset_name = TestDataNameToPath(unicharset_file);
     UNICHARSET unicharset;
     ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false));
@@ -76,7 +84,7 @@ class LSTMTrainerTest : public testing::Test {
                                    model_path.c_str(), checkpoint_path.c_str(),
                                    0, 0));
     trainer_->InitCharSet(file::JoinPath(FLAGS_test_tmpdir, kLang,
-                                         absl::StrCat(kLang, ".traineddata")));
+    absl::StrCat(kLang, ".traineddata")));
     int net_mode = adam ? NF_ADAM : 0;
     // Adam needs a higher learning rate, due to not multiplying the effective
     // rate by 1/(1-momentum).
@@ -157,9 +165,9 @@ class LSTMTrainerTest : public testing::Test {
   // string.
   void TestEncodeDecode(const std::string& lang, const std::string& str, bool recode) {
     std::string unicharset_name = lang + "/" + lang + ".unicharset";
-	std::string lstmf_name = lang +  ".Arial_Unicode_MS.exp0.lstmf";
+    std::string lstmf_name = lang +  ".Arial_Unicode_MS.exp0.lstmf";
     SetupTrainer("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", unicharset_name,
-                 lstmf_name, recode, true, 5e-4, true);
+                 lstmf_name, recode, true, 5e-4, true, lang);
     GenericVector<int> labels;
     EXPECT_TRUE(trainer_->EncodeString(str.c_str(), &labels));
     STRING decoded = trainer_->DecodeLabels(labels);
diff --git a/unittest/lstmtrainer_test.cc b/unittest/lstmtrainer_test.cc
@@ -1,6 +1,17 @@
-#include "leptonica/include/allheaders.h"
-#include "tesseract/api/baseapi.h"
-#include "tesseract/unittest/lstm_test.h"
+// (C) Copyright 2017, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "allheaders.h"
+#include "baseapi.h"
+#include "lstm_test.h"
 
 namespace tesseract {
 namespace {
@@ -21,16 +32,17 @@ TEST_F(LSTMTrainerTest, EncodesKor) {
 
 TEST_F(LSTMTrainerTest, MapCoder) {
   LSTMTrainer fra_trainer;
-  fra_trainer.InitCharSet(TestDataNameToPath("fra.traineddata"));
+  fra_trainer.InitCharSet(TestDataNameToPath("fra/fra.traineddata"));
   LSTMTrainer deu_trainer;
-  deu_trainer.InitCharSet(TestDataNameToPath("deu.traineddata"));
+  deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
   // A string that uses characters common to French and German.
-  string kTestStr = "The quick brown 'fox' jumps over: the lazy dog!";
+  std::string kTestStr = "The quick brown 'fox' jumps over: the lazy dog!";
   GenericVector<int> deu_labels;
   EXPECT_TRUE(deu_trainer.EncodeString(kTestStr.c_str(), &deu_labels));
   // The french trainer cannot decode them correctly.
   STRING badly_decoded = fra_trainer.DecodeLabels(deu_labels);
-  string bad_str(&badly_decoded[0], badly_decoded.length());
+  std::string bad_str(&badly_decoded[0], badly_decoded.length());
+  LOG(INFO) << "bad_str fra=" << bad_str << "\n";
   EXPECT_NE(kTestStr, bad_str);
   // Encode the string as fra.
   GenericVector<int> fra_labels;
@@ -49,7 +61,8 @@ TEST_F(LSTMTrainerTest, MapCoder) {
   }
   // The german trainer can now decode them correctly.
   STRING decoded = deu_trainer.DecodeLabels(mapped_fra_labels);
-  string ok_str(&decoded[0], decoded.length());
+  std::string ok_str(&decoded[0], decoded.length());
+  LOG(INFO) << "ok_str deu=" << ok_str << "\n";
   EXPECT_EQ(kTestStr, ok_str);
 }
 
@@ -58,29 +71,32 @@ TEST_F(LSTMTrainerTest, MapCoder) {
 TEST_F(LSTMTrainerTest, ConvertModel) {
   // Setup a trainer with a deu charset.
   LSTMTrainer deu_trainer;
-  deu_trainer.InitCharSet(TestDataNameToPath("deu.traineddata"));
+  deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
   // Load the fra traineddata, strip out the model, and save to a tmp file.
   TessdataManager mgr;
-  string fra_data =
-      file::JoinPath(FLAGS_test_srcdir, "tessdata_best", "fra.traineddata");
-  CHECK(mgr.Init(fra_data.c_str())) << "Failed to load " << fra_data;
-  string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm");
+  std::string fra_data =
+      file::JoinPath(TESSDATA_BEST_DIR, "fra.traineddata");
+  CHECK(mgr.Init(fra_data.c_str()));
+  LOG(INFO) << "Load " << fra_data  << "\n";
+  std::string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm");
   CHECK(mgr.ExtractToFile(model_path.c_str()));
+  LOG(INFO) << "Extract " << model_path << "\n";
   // Load the fra model into the deu_trainer, and save the converted model.
-  CHECK(deu_trainer.TryLoadingCheckpoint(model_path.c_str(), fra_data.c_str()))
-      << "Failed checkpoint load for " << model_path << " and " << fra_data;
-  string deu_data = file::JoinPath(FLAGS_test_tmpdir, "deu.traineddata");
+  CHECK(deu_trainer.TryLoadingCheckpoint(model_path.c_str(), fra_data.c_str()));
+  LOG(INFO) << "Checkpoint load for " << model_path << " and " << fra_data << "\n";
+  std::string deu_data = file::JoinPath(FLAGS_test_tmpdir, "deu.traineddata");
   CHECK(deu_trainer.SaveTraineddata(deu_data.c_str()));
+  LOG(INFO) << "Save " << deu_data << "\n";
   // Now run the saved model on phototest. (See BasicTesseractTest in
   // baseapi_test.cc).
   TessBaseAPI api;
-  api.Init(FLAGS_test_tmpdir.c_str(), "deu", tesseract::OEM_LSTM_ONLY);
-  Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
+  api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY);
+  Pix* src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
   CHECK(src_pix);
   api.SetImage(src_pix);
   std::unique_ptr<char[]> result(api.GetUTF8Text());
-  string truth_text;
-  CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
+  std::string truth_text;
+  CHECK_OK(file::GetContents(TestingNameToPath("phototest.gold.txt"),
                              &truth_text, file::Defaults()));
 
   EXPECT_STREQ(truth_text.c_str(), result.get());
diff --git a/unittest/recodebeam_test.cc b/unittest/recodebeam_test.cc
diff --git a/unittest/resultiterator_test.cc b/unittest/resultiterator_test.cc