Skip to content

Commit 83d4433

Browse files
committed
Modernize and format unichar.h
Signed-off-by: Stefan Weil <sw@weilnetz.de>
1 parent ac0b191 commit 83d4433

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

src/ccutil/unichar.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// File: unichar.cpp
33
// Description: Unicode character/ligature class.
44
// Author: Ray Smith
5-
// Created: Wed Jun 28 17:05:01 PDT 2006
65
//
76
// (C) Copyright 2006, Google Inc.
87
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -202,11 +201,11 @@ bool UNICHAR::const_iterator::is_legal() const {
202201
return utf8_step(it_) > 0;
203202
}
204203

205-
UNICHAR::const_iterator UNICHAR::begin(const char* utf8_str, const int len) {
204+
UNICHAR::const_iterator UNICHAR::begin(const char* utf8_str, int len) {
206205
return UNICHAR::const_iterator(utf8_str);
207206
}
208207

209-
UNICHAR::const_iterator UNICHAR::end(const char* utf8_str, const int len) {
208+
UNICHAR::const_iterator UNICHAR::end(const char* utf8_str, int len) {
210209
return UNICHAR::const_iterator(utf8_str + len);
211210
}
212211

src/ccutil/unichar.h

+10-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// File: unichar.h
33
// Description: Unicode character/ligature class.
44
// Author: Ray Smith
5-
// Created: Wed Jun 28 17:05:01 PDT 2006
65
//
76
// (C) Copyright 2006, Google Inc.
87
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -77,7 +76,7 @@ class UNICHAR {
7776
// Get the length of the UTF8 string.
7877
int utf8_len() const {
7978
int len = chars[UNICHAR_LEN - 1];
80-
return len >=0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
79+
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
8180
}
8281

8382
// Get a UTF8 string, but NOT nullptr terminated.
@@ -106,13 +105,13 @@ class UNICHAR {
106105
// tprintf("Char = %s\n", buf);
107106
// }
108107
class const_iterator {
109-
using CI = const_iterator ;
108+
using CI = const_iterator;
110109

111110
public:
112111
// Step to the next UTF8 character.
113112
// If the current position is at an illegal UTF8 character, then print an
114-
// error message and step by one byte. If the current position is at a nullptr
115-
// value, don't step past it.
113+
// error message and step by one byte. If the current position is at a
114+
// nullptr value, don't step past it.
116115
const_iterator& operator++();
117116

118117
// Return the UCS-4 value at the current position.
@@ -133,7 +132,9 @@ class UNICHAR {
133132
bool is_legal() const;
134133

135134
// Return the pointer into the string at the current position.
136-
const char* utf8_data() const { return it_; }
135+
const char* utf8_data() const {
136+
return it_;
137+
}
137138

138139
// Iterator equality operators.
139140
friend bool operator==(const CI& lhs, const CI& rhs) {
@@ -153,8 +154,8 @@ class UNICHAR {
153154
// Create a start/end iterator pointing to a string. Note that these methods
154155
// are static and do NOT create a copy or take ownership of the underlying
155156
// array.
156-
static const_iterator begin(const char* utf8_str, const int byte_length);
157-
static const_iterator end(const char* utf8_str, const int byte_length);
157+
static const_iterator begin(const char* utf8_str, int byte_length);
158+
static const_iterator end(const char* utf8_str, int byte_length);
158159

159160
// Converts a utf-8 string to a vector of unicodes.
160161
// Returns an empty vector if the input contains invalid UTF-8.
@@ -167,7 +168,7 @@ class UNICHAR {
167168
// A UTF-8 representation of 1 or more Unicode characters.
168169
// The last element (chars[UNICHAR_LEN - 1]) is a length if
169170
// its value < UNICHAR_LEN, otherwise it is a genuine character.
170-
char chars[UNICHAR_LEN];
171+
char chars[UNICHAR_LEN]{};
171172
};
172173

173174
} // namespace tesseract

0 commit comments

Comments
 (0)