@@ -111,7 +111,8 @@ bool ValidateMyanmar::ConsumeOptionalSignsIfPresent() {
111
111
}
112
112
}
113
113
// The following characters are allowed, all optional, and in sequence.
114
- const std::vector<char32> kSigns ({0x1036 , 0x1037 });
114
+ // Anusvar, Dot below, Visarga
115
+ const std::vector<char32> kSigns ({0x1036 , 0x1037 , 0x1038 });
115
116
for (char32 ch : kSigns ) {
116
117
if (codes_[codes_used_].second == ch) {
117
118
if (UseMultiCode (1 )) return true ;
@@ -131,29 +132,37 @@ bool ValidateMyanmar::ConsumeOptionalSignsIfPresent() {
131
132
// Returns true if the unicode is a Myanmar "letter" including consonants
132
133
// and independent vowels. Although table 16-3 distinguishes between some
133
134
// base consonants and vowels, the extensions make no such distinction, so we
134
- // put them all into a single bucket.
135
+ // put them all into a single bucket.
136
+ // Update MYANMAR LETTER based on following:
137
+ // https://unicode.org/charts/PDF/U1000.pdf - Myanmar
138
+ // http://unicode.org/charts/PDF/UAA60.pdf - Myanmar Extended-A
139
+ // http://unicode.org/charts/PDF/UA9E0.pdf - Myanmar Extended-B
135
140
/* static */
136
141
bool ValidateMyanmar::IsMyanmarLetter (char32 ch) {
137
142
return (0x1000 <= ch && ch <= 0x102a ) || ch == 0x103f ||
138
143
(0x1050 <= ch && ch <= 0x1055 ) || (0x105a <= ch && ch <= 0x105d ) ||
139
144
ch == 0x1061 || ch == 0x1065 || ch == 0x1066 ||
140
- (0x106e <= ch && ch <= 0x1070 ) || (0x1075 <= ch && ch <= 0x1080 ) ||
141
- ch == 0x108e || (0xa9e0 <= ch && ch <= 0xa9ef ) ||
142
- (0xa9fa <= ch && ch <= 0xa9ff ) || (0xaa60 <= ch && ch <= 0xaa73 ) ||
145
+ (0x106e <= ch && ch <= 0x1070 ) || (0x1075 <= ch && ch <= 0x1081 ) ||
146
+ ch == 0x108e || (0xa9e0 <= ch && ch <= 0xa9e4 ) ||
147
+ (0xa9e7 <= ch && ch <= 0xa9ef ) || (0xa9fa <= ch && ch <= 0xa9fe ) ||
148
+ (0xaa60 <= ch && ch <= 0xaa6f ) || (0xaa71 <= ch && ch <= 0xaa73 ) ||
143
149
ch == 0xaa7a || ch == 0xaa7e || ch == 0xaa7f ;
144
150
}
145
151
146
152
// Returns true if ch is a Myanmar digit or other symbol that does not take
147
- // part in being a syllable.
153
+ // part in being a syllable eg. punctuation marks.
154
+ // MYANMAR DIGIT, MYANMAR SYMBOL, MYANMAR LOGOGRAM
155
+ // REDUPLICATION MARKS
148
156
/* static */
149
157
bool ValidateMyanmar::IsMyanmarOther (char32 ch) {
150
158
IcuErrorCode err;
151
159
UScriptCode script_code = uscript_getScript (ch, err);
152
160
if (script_code != USCRIPT_MYANMAR && ch != Validator::kZeroWidthJoiner &&
153
161
ch != Validator::kZeroWidthNonJoiner )
154
162
return true ;
155
- return (0x1040 <= ch && ch <= 0x1049 ) || (0x1090 <= ch && ch <= 0x1099 ) ||
156
- (0x109c <= ch && ch <= 0x109d ) || (0xa9f0 <= ch && ch <= 0xa9f9 ) ||
163
+ return (0x1040 <= ch && ch <= 0x104f ) || (0x1090 <= ch && ch <= 0x1099 ) ||
164
+ (0x109e <= ch && ch <= 0x109f ) || (0xa9f0 <= ch && ch <= 0xa9f9 ) ||
165
+ ch == 0xa9e6 || ch == 0xaa70 ||
157
166
(0xaa74 <= ch && ch <= 0xaa79 );
158
167
}
159
168
0 commit comments