Skip to content

Commit 90dc951

Browse files
author
blake2-ppc
committed
std::str: Fix overflow problems in unsafe code
See issue #8742
1 parent e34e203 commit 90dc951

File tree

1 file changed

+59
-105
lines changed

1 file changed

+59
-105
lines changed

src/libstd/str.rs

Lines changed: 59 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ use char;
2222
use char::Char;
2323
use clone::{Clone, DeepClone};
2424
use container::{Container, Mutable};
25-
use num::Times;
26-
use iter::{Iterator, FromIterator, Extendable};
25+
use iter::{Iterator, FromIterator, Extendable, range};
2726
use iter::{Filter, AdditiveIterator, Map};
2827
use iter::{Invert, DoubleEndedIterator, ExactSize};
2928
use libc;
@@ -33,7 +32,6 @@ use ptr;
3332
use ptr::RawPtr;
3433
use to_str::ToStr;
3534
use uint;
36-
use unstable::raw::{Repr, Slice};
3735
use vec;
3836
use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
3937
use default::Default;
@@ -182,23 +180,15 @@ impl<'self, S: Str> StrVector for &'self [S] {
182180
fn concat(&self) -> ~str {
183181
if self.is_empty() { return ~""; }
184182

183+
// `len` calculation may overflow but push_str but will check boundaries
185184
let len = self.iter().map(|s| s.as_slice().len()).sum();
186185

187-
let mut s = with_capacity(len);
186+
let mut result = with_capacity(len);
188187

189-
unsafe {
190-
do s.as_mut_buf |buf, _| {
191-
let mut buf = buf;
192-
for ss in self.iter() {
193-
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
194-
ptr::copy_memory(buf, ssbuf, sslen);
195-
buf = buf.offset(sslen as int);
196-
}
197-
}
198-
}
199-
raw::set_len(&mut s, len);
188+
for s in self.iter() {
189+
result.push_str(s.as_slice())
200190
}
201-
s
191+
result
202192
}
203193

204194
/// Concatenate a vector of strings, placing a given separator between each.
@@ -209,34 +199,21 @@ impl<'self, S: Str> StrVector for &'self [S] {
209199
if sep.is_empty() { return self.concat(); }
210200

211201
// this is wrong without the guarantee that `self` is non-empty
202+
// `len` calculation may overflow but push_str but will check boundaries
212203
let len = sep.len() * (self.len() - 1)
213204
+ self.iter().map(|s| s.as_slice().len()).sum();
214-
let mut s = ~"";
205+
let mut result = with_capacity(len);
215206
let mut first = true;
216207

217-
s.reserve(len);
218-
219-
unsafe {
220-
do s.as_mut_buf |buf, _| {
221-
do sep.as_imm_buf |sepbuf, seplen| {
222-
let mut buf = buf;
223-
for ss in self.iter() {
224-
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
225-
if first {
226-
first = false;
227-
} else {
228-
ptr::copy_memory(buf, sepbuf, seplen);
229-
buf = buf.offset(seplen as int);
230-
}
231-
ptr::copy_memory(buf, ssbuf, sslen);
232-
buf = buf.offset(sslen as int);
233-
}
234-
}
235-
}
208+
for s in self.iter() {
209+
if first {
210+
first = false;
211+
} else {
212+
result.push_str(sep);
236213
}
237-
raw::set_len(&mut s, len);
214+
result.push_str(s.as_slice());
238215
}
239-
s
216+
result
240217
}
241218
}
242219

@@ -959,7 +936,6 @@ static TAG_CONT_U8: u8 = 128u8;
959936

960937
/// Unsafe operations
961938
pub mod raw {
962-
use option::Some;
963939
use cast;
964940
use libc;
965941
use ptr;
@@ -1062,21 +1038,22 @@ pub mod raw {
10621038
}
10631039
}
10641040

1065-
/// Appends a byte to a string. (Not UTF-8 safe).
1041+
/// Appends a byte to a string.
1042+
/// The caller must preserve the valid UTF-8 property.
10661043
#[inline]
10671044
pub unsafe fn push_byte(s: &mut ~str, b: u8) {
1068-
let v: &mut ~[u8] = cast::transmute(s);
1069-
v.push(b);
1045+
as_owned_vec(s).push(b)
10701046
}
10711047

1072-
/// Appends a vector of bytes to a string. (Not UTF-8 safe).
1073-
unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
1074-
let new_len = s.len() + bytes.len();
1075-
s.reserve_at_least(new_len);
1076-
for byte in bytes.iter() { push_byte(&mut *s, *byte); }
1048+
/// Appends a vector of bytes to a string.
1049+
/// The caller must preserve the valid UTF-8 property.
1050+
#[inline]
1051+
pub unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
1052+
vec::bytes::push_bytes(as_owned_vec(s), bytes);
10771053
}
10781054

1079-
/// Removes the last byte from a string and returns it. (Not UTF-8 safe).
1055+
/// Removes the last byte from a string and returns it.
1056+
/// The caller must preserve the valid UTF-8 property.
10801057
pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
10811058
let len = s.len();
10821059
assert!((len > 0u));
@@ -1085,7 +1062,8 @@ pub mod raw {
10851062
return b;
10861063
}
10871064

1088-
/// Removes the first byte from a string and returns it. (Not UTF-8 safe).
1065+
/// Removes the first byte from a string and returns it.
1066+
/// The caller must preserve the valid UTF-8 property.
10891067
pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
10901068
let len = s.len();
10911069
assert!((len > 0u));
@@ -1094,15 +1072,21 @@ pub mod raw {
10941072
return b;
10951073
}
10961074

1075+
/// Access the str in its vector representation.
1076+
/// The caller must preserve the valid UTF-8 property when modifying.
1077+
#[inline]
1078+
pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
1079+
cast::transmute(s)
1080+
}
1081+
10971082
/// Sets the length of a string
10981083
///
10991084
/// This will explicitly set the size of the string, without actually
11001085
/// modifying its buffers, so it is up to the caller to ensure that
11011086
/// the string is actually the specified size.
11021087
#[inline]
11031088
pub unsafe fn set_len(s: &mut ~str, new_len: uint) {
1104-
let v: &mut ~[u8] = cast::transmute(s);
1105-
vec::raw::set_len(v, new_len)
1089+
vec::raw::set_len(as_owned_vec(s), new_len)
11061090
}
11071091

11081092
/// Sets the length of a string
@@ -2053,22 +2037,11 @@ impl<'self> StrSlice<'self> for &'self str {
20532037
20542038
/// Given a string, make a new string with repeated copies of it.
20552039
fn repeat(&self, nn: uint) -> ~str {
2056-
do self.as_imm_buf |buf, len| {
2057-
let mut ret = with_capacity(nn * len);
2058-
2059-
unsafe {
2060-
do ret.as_mut_buf |rbuf, _len| {
2061-
let mut rbuf = rbuf;
2062-
2063-
do nn.times {
2064-
ptr::copy_memory(rbuf, buf, len);
2065-
rbuf = rbuf.offset(len as int);
2066-
}
2067-
}
2068-
raw::set_len(&mut ret, nn * len);
2069-
}
2070-
ret
2040+
let mut ret = with_capacity(nn * self.len());
2041+
for _ in range(0, nn) {
2042+
ret.push_str(*self);
20712043
}
2044+
ret
20722045
}
20732046
20742047
/// Retrieves the first character from a string slice and returns
@@ -2191,54 +2164,35 @@ impl OwnedStr for ~str {
21912164
/// Appends a string slice to the back of a string, without overallocating
21922165
#[inline]
21932166
fn push_str_no_overallocate(&mut self, rhs: &str) {
2194-
unsafe {
2195-
let llen = self.len();
2196-
let rlen = rhs.len();
2197-
self.reserve(llen + rlen);
2198-
do self.as_imm_buf |lbuf, _llen| {
2199-
do rhs.as_imm_buf |rbuf, _rlen| {
2200-
let dst = ptr::offset(lbuf, llen as int);
2201-
let dst = cast::transmute_mut_unsafe(dst);
2202-
ptr::copy_memory(dst, rbuf, rlen);
2203-
}
2204-
}
2205-
raw::set_len(self, llen + rlen);
2206-
}
2167+
let new_cap = self.len() + rhs.len();
2168+
self.reserve(new_cap);
2169+
self.push_str(rhs);
22072170
}
22082171
22092172
/// Appends a string slice to the back of a string
22102173
#[inline]
22112174
fn push_str(&mut self, rhs: &str) {
22122175
unsafe {
2213-
let llen = self.len();
2214-
let rlen = rhs.len();
2215-
self.reserve_at_least(llen + rlen);
2216-
do self.as_imm_buf |lbuf, _llen| {
2217-
do rhs.as_imm_buf |rbuf, _rlen| {
2218-
let dst = ptr::offset(lbuf, llen as int);
2219-
let dst = cast::transmute_mut_unsafe(dst);
2220-
ptr::copy_memory(dst, rbuf, rlen);
2221-
}
2222-
}
2223-
raw::set_len(self, llen + rlen);
2176+
raw::push_bytes(self, rhs.as_bytes());
22242177
}
22252178
}
22262179
22272180
/// Appends a character to the back of a string
22282181
#[inline]
22292182
fn push_char(&mut self, c: char) {
22302183
let cur_len = self.len();
2231-
self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
2232-
2233-
// Attempt to not use an intermediate buffer by just pushing bytes
2234-
// directly onto this string.
2184+
// may use up to 4 bytes.
22352185
unsafe {
2236-
let v = self.repr();
2237-
let len = c.encode_utf8(cast::transmute(Slice {
2238-
data: ((&(*v).data) as *u8).offset(cur_len as int),
2239-
len: 4,
2240-
}));
2241-
raw::set_len(self, cur_len + len);
2186+
raw::as_owned_vec(self).reserve_additional(4);
2187+
2188+
// Attempt to not use an intermediate buffer by just pushing bytes
2189+
// directly onto this string.
2190+
let used = do self.as_mut_buf |buf, _| {
2191+
do vec::raw::mut_buf_as_slice(buf.offset(cur_len as int), 4) |slc| {
2192+
c.encode_utf8(slc)
2193+
}
2194+
};
2195+
raw::set_len(self, cur_len + used);
22422196
}
22432197
}
22442198
@@ -2298,8 +2252,7 @@ impl OwnedStr for ~str {
22982252
#[inline]
22992253
fn reserve(&mut self, n: uint) {
23002254
unsafe {
2301-
let v: &mut ~[u8] = cast::transmute(self);
2302-
(*v).reserve(n);
2255+
raw::as_owned_vec(self).reserve(n)
23032256
}
23042257
}
23052258
@@ -2321,7 +2274,7 @@ impl OwnedStr for ~str {
23212274
/// * n - The number of bytes to reserve space for
23222275
#[inline]
23232276
fn reserve_at_least(&mut self, n: uint) {
2324-
self.reserve(uint::next_power_of_two(n))
2277+
self.reserve(uint::next_power_of_two_opt(n).unwrap_or(n))
23252278
}
23262279
23272280
/// Returns the number of single-byte characters the string can hold without
@@ -2351,8 +2304,9 @@ impl OwnedStr for ~str {
23512304
23522305
#[inline]
23532306
fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2354-
let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2355-
v.as_mut_buf(f)
2307+
unsafe {
2308+
raw::as_owned_vec(self).as_mut_buf(f)
2309+
}
23562310
}
23572311
}
23582312

0 commit comments

Comments
 (0)