@@ -31,41 +31,24 @@ UNICHARMAP::~UNICHARMAP() {
31
31
delete[] nodes;
32
32
}
33
33
34
- // Search the given unichar representation in the tree. Each character in the
35
- // string is interpreted as an index in an array of nodes.
36
- UNICHAR_ID UNICHARMAP::unichar_to_id (const char * const unichar_repr) const {
37
- const char * current_char = unichar_repr;
38
- UNICHARMAP_NODE* current_nodes = nodes;
39
-
40
- assert (*unichar_repr != ' \0 ' );
41
-
42
- do {
43
- if (*(current_char + 1 ) == ' \0 ' )
44
- return current_nodes[static_cast <unsigned char >(*current_char)].id ;
45
- current_nodes =
46
- current_nodes[static_cast <unsigned char >(*current_char)].children ;
47
- ++current_char;
48
- } while (true );
49
- }
50
-
51
34
// Search the given unichar representation in the tree, using length characters
52
35
// from it maximum. Each character in the string is interpreted as an index in
53
36
// an array of nodes.
54
37
UNICHAR_ID UNICHARMAP::unichar_to_id (const char * const unichar_repr,
55
38
int length) const {
56
- const char * current_char = unichar_repr;
57
39
UNICHARMAP_NODE* current_nodes = nodes;
58
40
59
41
assert (*unichar_repr != ' \0 ' );
60
42
assert (length > 0 && length <= UNICHAR_LEN);
61
43
44
+ int index = 0 ;
45
+ if (index >= length || unichar_repr[index ] == ' \0 ' ) return INVALID_UNICHAR_ID;
62
46
do {
63
- if (length == 1 || *(current_char + 1 ) == ' \0 ' )
64
- return current_nodes[static_cast <unsigned char >(*current_char )].id ;
47
+ if (index + 1 >= length || unichar_repr[ index + 1 ] == ' \0 ' )
48
+ return current_nodes[static_cast <unsigned char >(unichar_repr[ index ] )].id ;
65
49
current_nodes =
66
- current_nodes[static_cast <unsigned char >(*current_char)].children ;
67
- ++current_char;
68
- --length;
50
+ current_nodes[static_cast <unsigned char >(unichar_repr[index ])].children ;
51
+ ++index ;
69
52
} while (true );
70
53
}
71
54
@@ -75,15 +58,12 @@ UNICHAR_ID UNICHARMAP::unichar_to_id(const char* const unichar_repr,
75
58
// string is interpreted as an index in an array of nodes.
76
59
void UNICHARMAP::insert (const char * const unichar_repr, UNICHAR_ID id) {
77
60
const char * current_char = unichar_repr;
61
+ if (*current_char == ' \0 ' ) return ;
78
62
UNICHARMAP_NODE** current_nodes_pointer = &nodes;
79
-
80
- assert (*unichar_repr != ' \0 ' );
81
- assert (id >= 0 );
82
-
83
63
do {
84
64
if (*current_nodes_pointer == 0 )
85
65
*current_nodes_pointer = new UNICHARMAP_NODE[256 ];
86
- if (*( current_char + 1 ) == ' \0 ' ) {
66
+ if (current_char[ 1 ] == ' \0 ' ) {
87
67
(*current_nodes_pointer)
88
68
[static_cast <unsigned char >(*current_char)].id = id;
89
69
return ;
@@ -95,24 +75,6 @@ void UNICHARMAP::insert(const char* const unichar_repr, UNICHAR_ID id) {
95
75
} while (true );
96
76
}
97
77
98
- // Search the given unichar representation in the tree. Each character in the
99
- // string is interpreted as an index in an array of nodes. Stop once the tree
100
- // does not have anymore nodes or once we found the right unichar_repr.
101
- bool UNICHARMAP::contains (const char * const unichar_repr) const {
102
- if (unichar_repr == NULL || *unichar_repr == ' \0 ' ) return false ;
103
-
104
- const char * current_char = unichar_repr;
105
- UNICHARMAP_NODE* current_nodes = nodes;
106
-
107
- while (current_nodes != 0 && *(current_char + 1 ) != ' \0 ' ) {
108
- current_nodes =
109
- current_nodes[static_cast <unsigned char >(*current_char)].children ;
110
- ++current_char;
111
- }
112
- return current_nodes != 0 && *(current_char + 1 ) == ' \0 ' &&
113
- current_nodes[static_cast <unsigned char >(*current_char)].id >= 0 ;
114
- }
115
-
116
78
// Search the given unichar representation in the tree, using length characters
117
79
// from it maximum. Each character in the string is interpreted as an index in
118
80
// an array of nodes. Stop once the tree does not have anymore nodes or once we
@@ -121,24 +83,26 @@ bool UNICHARMAP::contains(const char* const unichar_repr,
121
83
int length) const {
122
84
if (unichar_repr == NULL || *unichar_repr == ' \0 ' ) return false ;
123
85
if (length <= 0 || length > UNICHAR_LEN) return false ;
124
-
125
- const char * current_char = unichar_repr;
86
+ int index = 0 ;
87
+ if ( index >= length || unichar_repr[ index ] == ' \0 ' ) return false ;
126
88
UNICHARMAP_NODE* current_nodes = nodes;
127
89
128
- while (current_nodes != 0 && (length > 1 && *(current_char + 1 ) != ' \0 ' )) {
90
+ while (current_nodes != 0 && index + 1 < length &&
91
+ unichar_repr[index + 1 ] != ' \0 ' ) {
129
92
current_nodes =
130
- current_nodes[static_cast <unsigned char >(*current_char)].children ;
131
- --length;
132
- ++current_char;
93
+ current_nodes[static_cast <unsigned char >(unichar_repr[index ])].children ;
94
+ ++index ;
133
95
}
134
- return current_nodes != 0 && (length == 1 || *(current_char + 1 ) == ' \0 ' ) &&
135
- current_nodes[static_cast <unsigned char >(*current_char)].id >= 0 ;
96
+ return current_nodes != 0 &&
97
+ (index + 1 >= length || unichar_repr[index + 1 ] == ' \0 ' ) &&
98
+ current_nodes[static_cast <unsigned char >(unichar_repr[index ])].id >= 0 ;
136
99
}
137
100
138
101
// Return the minimum number of characters that must be used from this string
139
102
// to obtain a match in the UNICHARMAP.
140
103
int UNICHARMAP::minmatch (const char * const unichar_repr) const {
141
104
const char * current_char = unichar_repr;
105
+ if (*current_char == ' \0 ' ) return 0 ;
142
106
UNICHARMAP_NODE* current_nodes = nodes;
143
107
144
108
while (current_nodes != NULL && *current_char != ' \0 ' ) {
0 commit comments