23
23
#include " unicharset_training_utils.h"
24
24
25
25
STRING_PARAM_FLAG (input_unicharset, " " ,
26
- " Unicharset to complete and use in encoding" );
26
+ " Filename with unicharset to complete and use in encoding" );
27
27
STRING_PARAM_FLAG (script_dir, " " ,
28
28
" Directory name for input script unicharsets" );
29
29
STRING_PARAM_FLAG (words, " " ,
@@ -40,26 +40,17 @@ BOOL_PARAM_FLAG(pass_through_recoder, false,
40
40
" unicharset. Otherwise, potentially a compression of it" );
41
41
42
42
int main (int argc, char ** argv) {
43
+ // Sets properties on the input unicharset file, and writes:
44
+ // rootdir/lang/lang.charset_size=ddd.txt
45
+ // rootdir/lang/lang.traineddata
46
+ // rootdir/lang/lang.unicharset
47
+ // If the 3 word lists are provided, the dawgs are also added
48
+ // to the traineddata file.
49
+ // The output unicharset and charset_size files are just for
50
+ // human readability.
43
51
tesseract::CheckSharedLibraryVersion ();
44
52
tesseract::ParseCommandLineFlags (argv[0 ], &argc, &argv, true );
45
53
46
- // Check validity of input flags.
47
- if (FLAGS_input_unicharset.empty () || FLAGS_script_dir.empty () ||
48
- FLAGS_output_dir.empty () || FLAGS_lang.empty ()) {
49
- tprintf (" Usage: %s --input_unicharset filename --script_dir dirname\n " ,
50
- argv[0 ]);
51
- tprintf (" --output_dir rootdir --lang lang [--lang_is_rtl]\n " );
52
- tprintf (" [--words file --puncs file --numbers file]\n " );
53
- tprintf (" Sets properties on the input unicharset file, and writes:\n " );
54
- tprintf (" rootdir/lang/lang.charset_size=ddd.txt\n " );
55
- tprintf (" rootdir/lang/lang.traineddata\n " );
56
- tprintf (" rootdir/lang/lang.unicharset\n " );
57
- tprintf (" If the 3 word lists are provided, the dawgs are also added to" );
58
- tprintf (" the traineddata file.\n " );
59
- tprintf (" The output unicharset and charset_size files are just for human" );
60
- tprintf (" readability.\n " );
61
- exit (1 );
62
- }
63
54
GenericVector<STRING> words, puncs, numbers;
64
55
// If these reads fail, we get a warning message and an empty list of words.
65
56
tesseract::ReadFile (FLAGS_words.c_str (), nullptr ).split (' \n ' , &words);
0 commit comments