Skip to content

Commit 284df6f

Browse files
committed
Merge pull request #4624 from jreback/astype_iloc2
API/CLN: setitem in Series now consistent with DataFrame
2 parents 9ea0d44 + 3b5d06e commit 284df6f

File tree

8 files changed

+90
-83
lines changed

8 files changed

+90
-83
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
265265
- Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly
266266
(causing the original stack trace to be truncated).
267267
- Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`)
268+
- Fix assignment with iloc/loc involving a dtype change in an existing column (:issue:`4312`)
269+
have internal setitem_with_indexer in core/indexing to use Block.setitem
268270

269271
pandas 0.12
270272
===========

pandas/core/common.py

+6-36
Original file line numberDiff line numberDiff line change
@@ -891,42 +891,6 @@ def changeit():
891891

892892
return result, False
893893

894-
895-
def _maybe_upcast_indexer(result, indexer, other, dtype=None):
896-
""" a safe version of setitem that (potentially upcasts the result
897-
return the result and a changed flag
898-
"""
899-
900-
other = _maybe_cast_scalar(result.dtype, other)
901-
original_dtype = result.dtype
902-
903-
def changeit():
904-
# our type is wrong here, need to upcast
905-
r, fill_value = _maybe_upcast(
906-
result, fill_value=other, dtype=dtype, copy=True)
907-
try:
908-
r[indexer] = other
909-
except:
910-
911-
# if we hit this then we still have an incompatible type
912-
r[indexer] = fill_value
913-
914-
# if we have changed to floats, might want to cast back if we can
915-
r = _possibly_downcast_to_dtype(r, original_dtype)
916-
return r, True
917-
918-
new_dtype, fill_value = _maybe_promote(original_dtype, other)
919-
if new_dtype != result.dtype:
920-
return changeit()
921-
922-
try:
923-
result[indexer] = other
924-
except:
925-
return changeit()
926-
927-
return result, False
928-
929-
930894
def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
931895
""" provide explicty type promotion and coercion
932896
@@ -987,6 +951,12 @@ def _possibly_downcast_to_dtype(result, dtype):
987951
dtype = np.dtype(dtype)
988952

989953
try:
954+
955+
# don't allow upcasts here
956+
if dtype.kind == result.dtype.kind:
957+
if result.dtype.itemsize <= dtype.itemsize:
958+
return result
959+
990960
if issubclass(dtype.type, np.floating):
991961
return result.astype(dtype)
992962
elif dtype == np.bool_ or issubclass(dtype.type, np.integer):

pandas/core/groupby.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1557,7 +1557,9 @@ def transform(self, func, *args, **kwargs):
15571557

15581558
# need to do a safe put here, as the dtype may be different
15591559
# this needs to be an ndarray
1560-
result,_ = com._maybe_upcast_indexer(result, indexer, res)
1560+
result = Series(result)
1561+
result.loc[indexer] = res
1562+
result = result.values
15611563

15621564
# downcast if we can (and need)
15631565
result = _possibly_downcast_to_dtype(result, dtype)

pandas/core/indexing.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,13 @@ def _setitem_with_indexer(self, indexer, value):
124124
item_labels = self.obj._get_axis(info_axis)
125125

126126
def setter(item, v):
127-
data = self.obj[item]
128-
values = data.values
129-
if np.prod(values.shape):
130-
result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None))
131-
self.obj[item] = result
127+
s = self.obj[item]
128+
pi = plane_indexer[0] if len(plane_indexer) == 1 else plane_indexer
129+
130+
# set the item, possibly having a dtype change
131+
s = s.copy()
132+
s._data = s._data.setitem(pi,v)
133+
self.obj[item] = s
132134

133135
labels = item_labels[info_idx]
134136

pandas/core/internals.py

+44-30
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class Block(PandasObject):
3939
is_float = False
4040
is_integer = False
4141
is_complex = False
42+
is_datetime = False
4243
is_bool = False
4344
is_object = False
4445
is_sparse = False
@@ -453,10 +454,19 @@ def _can_hold_element(self, value):
453454
def _try_cast(self, value):
454455
raise NotImplementedError()
455456

456-
def _try_cast_result(self, result):
457+
def _try_cast_result(self, result, dtype=None):
457458
""" try to cast the result to our original type,
458459
we may have roundtripped thru object in the mean-time """
459-
return result
460+
if dtype is None:
461+
dtype = self.dtype
462+
463+
if self.is_integer or self.is_bool or self.is_datetime:
464+
pass
465+
elif self.is_float and result.dtype == self.dtype:
466+
return result
467+
468+
# may need to change the dtype here
469+
return _possibly_downcast_to_dtype(result, dtype)
460470

461471
def _try_coerce_args(self, values, other):
462472
""" provide coercion to our input arguments """
@@ -513,27 +523,29 @@ def setitem(self, indexer, value):
513523
""" set the value inplace; return a new block (of a possibly different dtype)
514524
indexer is a direct slice/positional indexer; value must be a compaitable shape """
515525

516-
values = self.values
517-
if self.ndim == 2:
518-
values = values.T
526+
# coerce args
527+
values, value = self._try_coerce_args(self.values, value)
528+
arr_value = np.array(value)
519529

520-
# 2-d (DataFrame) are represented as a transposed array
521-
if self._can_hold_element(value):
522-
try:
523-
values[indexer] = value
524-
return [ self ]
525-
except (IndexError):
526-
return [ self ]
527-
except:
528-
pass
530+
# cast the values to a type that can hold nan (if necessary)
531+
if not self._can_hold_element(value):
532+
dtype, _ = com._maybe_promote(arr_value.dtype)
533+
values = values.astype(dtype)
529534

530-
# create an indexing mask, the putmask which potentially changes the dtype
531-
indices = np.arange(np.prod(values.shape)).reshape(values.shape)
532-
mask = indices[indexer] == indices
533-
if self.ndim == 2:
534-
mask = mask.T
535+
try:
536+
# set and return a block
537+
transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x)
538+
values = transf(values)
539+
values[indexer] = value
540+
541+
# coerce and try to infer the dtypes of the result
542+
values = self._try_coerce_result(values)
543+
values = self._try_cast_result(values, 'infer')
544+
return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)]
545+
except:
546+
pass
535547

536-
return self.putmask(mask, value, inplace=True)
548+
return [ self ]
537549

538550
def putmask(self, mask, new, inplace=False):
539551
""" putmask the data to the block; it is possible that we may create a new dtype of block
@@ -585,7 +597,10 @@ def create_block(v, m, n, item, reshape=True):
585597
if nv is None:
586598
dtype, _ = com._maybe_promote(n.dtype)
587599
nv = v.astype(dtype)
588-
np.putmask(nv, m, n)
600+
try:
601+
nv[m] = n
602+
except:
603+
np.putmask(nv, m, n)
589604

590605
if reshape:
591606
nv = _block_shape(nv)
@@ -842,10 +857,6 @@ class NumericBlock(Block):
842857
is_numeric = True
843858
_can_hold_na = True
844859

845-
def _try_cast_result(self, result):
846-
return _possibly_downcast_to_dtype(result, self.dtype)
847-
848-
849860
class FloatBlock(NumericBlock):
850861
is_float = True
851862
_downcast_dtype = 'int64'
@@ -1104,6 +1115,7 @@ def re_replacer(s):
11041115

11051116

11061117
class DatetimeBlock(Block):
1118+
is_datetime = True
11071119
_can_hold_na = True
11081120

11091121
def __init__(self, values, items, ref_items, fastpath=False, placement=None, **kwargs):
@@ -1119,8 +1131,8 @@ def _gi(self, arg):
11191131
def _can_hold_element(self, element):
11201132
if is_list_like(element):
11211133
element = np.array(element)
1122-
return element.dtype == _NS_DTYPE
1123-
return com.is_integer(element) or isinstance(element, datetime)
1134+
return element.dtype == _NS_DTYPE or element.dtype == np.int64
1135+
return com.is_integer(element) or isinstance(element, datetime) or isnull(element)
11241136

11251137
def _try_cast(self, element):
11261138
try:
@@ -1133,10 +1145,10 @@ def _try_coerce_args(self, values, other):
11331145
we are going to compare vs i8, so coerce to integer
11341146
values is always ndarra like, other may not be """
11351147
values = values.view('i8')
1136-
if isinstance(other, datetime):
1137-
other = lib.Timestamp(other).asm8.view('i8')
1138-
elif isnull(other):
1148+
if isnull(other) or (np.isscalar(other) and other == tslib.iNaT):
11391149
other = tslib.iNaT
1150+
elif isinstance(other, datetime):
1151+
other = lib.Timestamp(other).asm8.view('i8')
11401152
else:
11411153
other = other.view('i8')
11421154

@@ -1438,6 +1450,8 @@ def split_block_at(self, item):
14381450
return []
14391451
return super(SparseBlock, self).split_block_at(self, item)
14401452

1453+
def _try_cast_result(self, result, dtype=None):
1454+
return result
14411455

14421456
def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fastpath=False, placement=None):
14431457

pandas/tests/test_common.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,14 @@ def test_nan_to_nat_conversions():
127127
result = df.loc[4,'B'].value
128128
assert(result == iNaT)
129129

130-
values = df['B'].values
131-
result, changed = com._maybe_upcast_indexer(values,tuple([slice(8,9)]),np.nan)
132-
assert(isnull(result[8]))
130+
s = df['B'].copy()
131+
s._data = s._data.setitem(tuple([slice(8,9)]),np.nan)
132+
assert(isnull(s[8]))
133133

134134
# numpy < 1.7.0 is wrong
135135
from distutils.version import LooseVersion
136136
if LooseVersion(np.__version__) >= '1.7.0':
137-
assert(result[8] == np.datetime64('NaT'))
137+
assert(s[8].value == np.datetime64('NaT').astype(np.int64))
138138

139139
def test_any_none():
140140
assert(com._any_none(1, 2, 3, None))

pandas/tests/test_frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -7669,10 +7669,9 @@ def _check_get(df, cond, check_dtypes = True):
76697669
# upcasting case (GH # 2794)
76707670
df = DataFrame(dict([ (c,Series([1]*3,dtype=c)) for c in ['int64','int32','float32','float64'] ]))
76717671
df.ix[1,:] = 0
7672-
76737672
result = df.where(df>=0).get_dtype_counts()
76747673

7675-
#### when we don't preserver boolean casts ####
7674+
#### when we don't preserve boolean casts ####
76767675
#expected = Series({ 'float32' : 1, 'float64' : 3 })
76777676

76787677
expected = Series({ 'float32' : 1, 'float64' : 1, 'int32' : 1, 'int64' : 1 })

pandas/tests/test_indexing.py

+23-5
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,7 @@ def test_multi_assign(self):
942942
# frame on rhs
943943
df2.ix[mask, cols]= dft.ix[mask, cols]
944944
assert_frame_equal(df2,expected)
945+
945946
df2.ix[mask, cols]= dft.ix[mask, cols]
946947
assert_frame_equal(df2,expected)
947948

@@ -964,11 +965,12 @@ def test_ix_assign_column_mixed(self):
964965
# GH 3668, mixed frame with series value
965966
df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'})
966967
expected = df.copy()
967-
expected.ix[0, 'y'] = 1000
968-
expected.ix[2, 'y'] = 1200
969-
expected.ix[4, 'y'] = 1400
970-
expected.ix[6, 'y'] = 1600
971-
expected.ix[8, 'y'] = 1800
968+
969+
for i in range(5):
970+
indexer = i*2
971+
v = 1000 + i*200
972+
expected.ix[indexer, 'y'] = v
973+
self.assert_(expected.ix[indexer, 'y'] == v)
972974

973975
df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100
974976
assert_frame_equal(df,expected)
@@ -1197,6 +1199,22 @@ def gen_expected(df,mask):
11971199
expected = gen_expected(df,mask)
11981200
assert_frame_equal(result,expected)
11991201

1202+
def test_astype_assignment_with_iloc(self):
1203+
1204+
# GH4312
1205+
df_orig = DataFrame([['1','2','3','.4',5,6.,'foo']],columns=list('ABCDEFG'))
1206+
1207+
df = df_orig.copy()
1208+
df.iloc[:,0:3] = df.iloc[:,0:3].astype(int)
1209+
result = df.get_dtype_counts().sort_index()
1210+
expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index()
1211+
assert_series_equal(result,expected)
1212+
1213+
df = df_orig.copy()
1214+
df.iloc[:,0:3] = df.iloc[:,0:3].convert_objects(convert_numeric=True)
1215+
result = df.get_dtype_counts().sort_index()
1216+
expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index()
1217+
12001218
if __name__ == '__main__':
12011219
import nose
12021220
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)