We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 1b0379c commit fb359fcCopy full SHA for fb359fc
training/unicharset_extractor.cpp
@@ -50,7 +50,9 @@ static void AddStringsToUnicharset(const GenericVector<STRING>& strings,
50
/*report_errors*/ true,
51
strings[i].string(), &normalized)) {
52
for (const string& normed : normalized) {
53
- if (normed.empty() || IsWhitespace(normed[0])) continue;
+
54
+ // normed is a UTF-8 encoded string
55
+ if (normed.empty() || IsUTF8Whitespace(normed.c_str())) continue;
56
unicharset->unichar_insert(normed.c_str());
57
}
58
} else {
0 commit comments