2
2
// File: unichar.h
3
3
// Description: Unicode character/ligature class.
4
4
// Author: Ray Smith
5
- // Created: Wed Jun 28 17:05:01 PDT 2006
6
5
//
7
6
// (C) Copyright 2006, Google Inc.
8
7
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -77,7 +76,7 @@ class UNICHAR {
77
76
// Get the length of the UTF8 string.
78
77
int utf8_len () const {
79
78
int len = chars[UNICHAR_LEN - 1 ];
80
- return len >=0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
79
+ return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
81
80
}
82
81
83
82
// Get a UTF8 string, but NOT nullptr terminated.
@@ -106,13 +105,13 @@ class UNICHAR {
106
105
// tprintf("Char = %s\n", buf);
107
106
// }
108
107
class const_iterator {
109
- using CI = const_iterator ;
108
+ using CI = const_iterator;
110
109
111
110
public:
112
111
// Step to the next UTF8 character.
113
112
// If the current position is at an illegal UTF8 character, then print an
114
- // error message and step by one byte. If the current position is at a nullptr
115
- // value, don't step past it.
113
+ // error message and step by one byte. If the current position is at a
114
+ // nullptr value, don't step past it.
116
115
const_iterator& operator ++();
117
116
118
117
// Return the UCS-4 value at the current position.
@@ -133,7 +132,9 @@ class UNICHAR {
133
132
bool is_legal () const ;
134
133
135
134
// Return the pointer into the string at the current position.
136
- const char * utf8_data () const { return it_; }
135
+ const char * utf8_data () const {
136
+ return it_;
137
+ }
137
138
138
139
// Iterator equality operators.
139
140
friend bool operator ==(const CI& lhs, const CI& rhs) {
@@ -153,8 +154,8 @@ class UNICHAR {
153
154
// Create a start/end iterator pointing to a string. Note that these methods
154
155
// are static and do NOT create a copy or take ownership of the underlying
155
156
// array.
156
- static const_iterator begin (const char * utf8_str, const int byte_length);
157
- static const_iterator end (const char * utf8_str, const int byte_length);
157
+ static const_iterator begin (const char * utf8_str, int byte_length);
158
+ static const_iterator end (const char * utf8_str, int byte_length);
158
159
159
160
// Converts a utf-8 string to a vector of unicodes.
160
161
// Returns an empty vector if the input contains invalid UTF-8.
@@ -167,7 +168,7 @@ class UNICHAR {
167
168
// A UTF-8 representation of 1 or more Unicode characters.
168
169
// The last element (chars[UNICHAR_LEN - 1]) is a length if
169
170
// its value < UNICHAR_LEN, otherwise it is a genuine character.
170
- char chars[UNICHAR_LEN];
171
+ char chars[UNICHAR_LEN]{} ;
171
172
};
172
173
173
174
} // namespace tesseract
0 commit comments