diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 401a7746953cb..d1a1c5f6b2945 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -233,7 +233,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return self._combine_series(casted, na_op, fill_value, axis, level) elif other.ndim == 2: - casted = DataFrame(other, index=self.index, + casted = self._constructor(other, index=self.index, columns=self.columns) return self._combine_frame(casted, na_op, fill_value, level) else: # pragma: no cover @@ -297,7 +297,7 @@ def f(self, other, axis=default_axis, level=None): return self._combine_series(casted, na_op, None, axis, level) elif other.ndim == 2: - casted = DataFrame(other, index=self.index, + casted = self._constructor(other, index=self.index, columns=self.columns) return self._flex_compare_frame(casted, na_op, str_rep, level) @@ -1771,7 +1771,7 @@ def as_blocks(self, columns=None): bd = dict() for b in self._data.blocks: b = b.reindex_items_from(columns or b.items) - bd[str(b.dtype)] = DataFrame(BlockManager([ b ], [ b.items, self.index ])) + bd[str(b.dtype)] = self._constructor(BlockManager([ b ], [ b.items, self.index ])) return bd blocks = property(fget=as_blocks) @@ -1841,12 +1841,12 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover (vals, idx, cols), object_state = state index = _unpickle_array(idx) - dm = DataFrame(vals, index=index, columns=_unpickle_array(cols), + dm = self._constructor(vals, index=index, columns=_unpickle_array(cols), copy=False) if object_state is not None: ovals, _, ocols = object_state - objects = DataFrame(ovals, index=index, + objects = self._constructor(ovals, index=index, columns=_unpickle_array(ocols), copy=False) @@ -2041,7 +2041,7 @@ def _getitem_multilevel(self, key): result.columns = result_columns else: new_values = self.values[:, loc] - result = DataFrame(new_values, index=self.index, + result = self._constructor(new_values, index=self.index, columns=result_columns) if len(result.columns) == 1: top = result.columns[0] @@ -2558,7 +2558,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, if copy and fdata is self._data: fdata = fdata.copy() - left_result = DataFrame(fdata) + left_result = self._constructor(fdata) right_result = other if ridx is None else other.reindex(join_index) fill_na = notnull(fill_value) or (method is not None) @@ -2737,7 +2737,7 @@ def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer, if copy and new_data is self._data: new_data = new_data.copy() - return DataFrame(new_data) + return self._constructor(new_data) def reindex_like(self, other, method=None, copy=True, limit=None, fill_value=NA): @@ -2985,7 +2985,7 @@ def take(self, indices, axis=0, convert=True): if self._is_mixed_type: if axis == 0: new_data = self._data.take(indices, axis=1, verify=False) - return DataFrame(new_data) + return self._constructor(new_data) else: new_columns = self.columns.take(indices) return self.reindex(columns=new_columns) @@ -2999,7 +2999,7 @@ def take(self, indices, axis=0, convert=True): else: new_columns = self.columns.take(indices) new_index = self.index - return DataFrame(new_values, index=new_index, + return self._constructor(new_values, index=new_index, columns=new_columns) #---------------------------------------------------------------------- @@ -4075,7 +4075,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, raise NotImplementedError if not isinstance(other, DataFrame): - other = DataFrame(other) + other = self._constructor(other) other = other.reindex_like(self) @@ -4425,7 +4425,7 @@ def _apply_raw(self, func, axis): # TODO: mixed type case if result.ndim == 2: - return DataFrame(result, index=self.index, + return self._constructor(result, index=self.index, columns=self.columns) else: return Series(result, index=self._get_agg_axis(axis)) @@ -4592,10 +4592,10 @@ def append(self, other, ignore_index=False, verify_integrity=False): index = None if other.name is None else [other.name] other = other.reindex(self.columns, copy=False) - other = DataFrame(other.values.reshape((1, len(other))), + other = self._constructor(other.values.reshape((1, len(other))), index=index, columns=self.columns) elif isinstance(other, list) and not isinstance(other[0], DataFrame): - other = DataFrame(other) + other = self._constructor(other) if (self.columns.get_indexer(other.columns) >= 0).all(): other = other.ix[:, self.columns] @@ -4660,7 +4660,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', if isinstance(other, Series): if other.name is None: raise AssertionError('Other Series must have a name') - other = DataFrame({other.name: other}) + other = self._constructor({other.name: other}) if isinstance(other, DataFrame): return merge(self, other, left_on=on, how=how, @@ -4862,7 +4862,7 @@ def describe(self, percentile_width=50): numdata = self._get_numeric_data() if len(numdata.columns) == 0: - return DataFrame(dict((k, v.describe()) + return self._constructor(dict((k, v.describe()) for k, v in self.iteritems()), columns=self.columns) @@ -4954,7 +4954,7 @@ def _count_level(self, level, axis=0, numeric_only=False): labels = com._ensure_int64(frame.index.labels[level]) counts = lib.count_level_2d(mask, labels, len(level_index)) - result = DataFrame(counts, index=level_index, + result = self._constructor(counts, index=level_index, columns=frame.columns) if axis == 1: diff --git a/pandas/core/series.py b/pandas/core/series.py index 15a425fb3fd73..bdc69fbb0af85 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -161,7 +161,7 @@ def wrap_results(x): if self.index.equals(other.index): name = _maybe_match_name(self, other) - return Series(wrap_results(na_op(lvalues, rvalues)), + return self._constructor(wrap_results(na_op(lvalues, rvalues)), index=self.index, name=name, dtype=dtype) join_idx, lidx, ridx = self.index.join(other.index, how='outer', @@ -176,14 +176,14 @@ def wrap_results(x): arr = na_op(lvalues, rvalues) name = _maybe_match_name(self, other) - return Series(wrap_results(arr), index=join_idx, name=name,dtype=dtype) + return self._constructor(wrap_results(arr), index=join_idx, name=name,dtype=dtype) elif isinstance(other, DataFrame): return NotImplemented else: # scalars if hasattr(lvalues,'values'): lvalues = lvalues.values - return Series(wrap_results(na_op(lvalues, rvalues)), + return self._constructor(wrap_results(na_op(lvalues, rvalues)), index=self.index, name=self.name, dtype=dtype) return wrapper @@ -335,10 +335,10 @@ def f(self, other, level=None, fill_value=None): elif isinstance(other, (pa.Array, list, tuple)): if len(other) != len(self): raise ValueError('Lengths must be equal') - return self._binop(Series(other, self.index), op, + return self._binop(self._constructor(other, self.index), op, level=level, fill_value=fill_value) else: - return Series(op(self.values, other), self.index, + return self._constructor(op(self.values, other), self.index, name=self.name) f.__name__ = name @@ -488,7 +488,7 @@ def __new__(cls, data=None, index=None, dtype=None, name=None, index = DatetimeIndex(index) subarr = subarr.view(TimeSeries) else: - subarr = subarr.view(Series) + subarr = subarr.view(cls) subarr.index = index subarr.name = name @@ -506,7 +506,7 @@ def from_array(cls, arr, index=None, name=None, copy=False): if copy: arr = arr.copy() - klass = Series + klass = cls if index.is_all_dates: if not isinstance(index, (DatetimeIndex, PeriodIndex)): index = DatetimeIndex(index) @@ -522,6 +522,10 @@ def __init__(self, data=None, index=None, dtype=None, name=None, copy=False): pass + @property + def _constructor(self): + return self.__class__ + @property def _can_hold_na(self): return not is_integer_dtype(self.dtype) @@ -704,17 +708,17 @@ def _get_values_tuple(self, key): # If key is contained, would have returned by now indexer, new_index = self.index.get_loc_level(key) - return Series(self.values[indexer], index=new_index, name=self.name) + return self._constructor(self.values[indexer], index=new_index, name=self.name) def _get_values(self, indexer): try: - return Series(self.values[indexer], index=self.index[indexer], + return self._constructor(self.values[indexer], index=self.index[indexer], name=self.name) except Exception: return self.values[indexer] def get_dtype_counts(self): - return Series({ self.dtype.name : 1 }) + return self._constructor({ self.dtype.name : 1 }) def where(self, cond, other=nan, inplace=False): """ @@ -955,7 +959,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True): converted : Series """ if self.dtype == np.object_: - return Series(com._possibly_convert_objects(self.values, + return self._constructor(com._possibly_convert_objects(self.values, convert_dates=convert_dates, convert_numeric=convert_numeric), index=self.index, name=self.name) return self.copy() if copy else self @@ -966,7 +970,7 @@ def repeat(self, reps): """ new_index = self.index.repeat(reps) new_values = self.values.repeat(reps) - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name) def reshape(self, newshape, order='C'): """ @@ -1048,7 +1052,7 @@ def set_value(self, label, value): new_index = self.index.insert(len(self), label) new_values = np.concatenate([self.values, [value]]) - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name) def reset_index(self, level=None, drop=False, name=None, inplace=False): """ @@ -1084,7 +1088,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): # set name if it was passed, otherwise, keep the previous name self.name = name or self.name else: - return Series(self.values.copy(), index=new_index, + return self._constructor(self.values.copy(), index=new_index, name=self.name) elif inplace: raise TypeError('Cannot reset_index inplace on a Series ' @@ -1116,7 +1120,7 @@ def __unicode__(self): name=True, dtype=True) else: - result = u'Series([], dtype: %s)' % self.dtype + result = u'%s([], dtype: %s)' % (type(self).__name__, self.dtype) if not ( type(result) == unicode): raise AssertionError() @@ -1253,11 +1257,11 @@ def iteritems(self): # inversion def __neg__(self): arr = operator.neg(self.values) - return Series(arr, self.index, name=self.name) + return self._constructor(arr, self.index, name=self.name) def __invert__(self): arr = operator.inv(self.values) - return Series(arr, self.index, name=self.name) + return self._constructor(arr, self.index, name=self.name) # binary logic __or__ = _bool_method(operator.or_, '__or__') @@ -1313,7 +1317,7 @@ def copy(self, order='C'): ------- cp : Series """ - return Series(self.values.copy(order), index=self.index, + return self._constructor(self.values.copy(order), index=self.index, name=self.name) def tolist(self): @@ -1390,14 +1394,14 @@ def count(self, level=None): level_index = self.index.levels[level] if len(self) == 0: - return Series(0, index=level_index) + return self._constructor(0, index=level_index) # call cython function max_bin = len(level_index) labels = com._ensure_int64(self.index.labels[level]) counts = lib.count_level_1d(mask.view(pa.uint8), labels, max_bin) - return Series(counts, index=level_index) + return self._constructor(counts, index=level_index) return notnull(self.values).sum() @@ -1473,7 +1477,7 @@ def duplicated(self, take_last=False): """ keys = com._ensure_object(self.values) duplicated = lib.duplicated(keys, take_last=take_last) - return Series(duplicated, index=self.index, name=self.name) + return self._constructor(duplicated, index=self.index, name=self.name) sum = _make_stat_func(nanops.nansum, 'sum', 'sum') mean = _make_stat_func(nanops.nanmean, 'mean', 'mean') @@ -1661,7 +1665,7 @@ def cumsum(self, axis=0, dtype=None, out=None, skipna=True): if do_mask: np.putmask(result, mask, pa.NA) - return Series(result, index=self.index) + return self._constructor(result, index=self.index) def cumprod(self, axis=0, dtype=None, out=None, skipna=True): """ @@ -1690,7 +1694,7 @@ def cumprod(self, axis=0, dtype=None, out=None, skipna=True): if do_mask: np.putmask(result, mask, pa.NA) - return Series(result, index=self.index) + return self._constructor(result, index=self.index) def cummax(self, axis=0, dtype=None, out=None, skipna=True): """ @@ -1719,7 +1723,7 @@ def cummax(self, axis=0, dtype=None, out=None, skipna=True): if do_mask: np.putmask(result, mask, pa.NA) - return Series(result, index=self.index) + return self._constructor(result, index=self.index) def cummin(self, axis=0, dtype=None, out=None, skipna=True): """ @@ -1748,7 +1752,7 @@ def cummin(self, axis=0, dtype=None, out=None, skipna=True): if do_mask: np.putmask(result, mask, pa.NA) - return Series(result, index=self.index) + return self._constructor(result, index=self.index) @Appender(pa.Array.round.__doc__) def round(self, decimals=0, out=None): @@ -1757,7 +1761,7 @@ def round(self, decimals=0, out=None): """ result = self.values.round(decimals, out=out) if out is None: - result = Series(result, index=self.index, name=self.name) + result = self._constructor(result, index=self.index, name=self.name) return result @@ -1846,7 +1850,7 @@ def pretty_name(x): lb), self.median(), self.quantile(ub), self.max()] - return Series(data, index=names) + return self._constructor(data, index=names) def corr(self, other, method='pearson', min_periods=None): @@ -1910,7 +1914,7 @@ def diff(self, periods=1): diffed : Series """ result = com.diff(self.values, periods) - return Series(result, self.index, name=self.name) + return self._constructor(result, self.index, name=self.name) def autocorr(self): """ @@ -2091,7 +2095,7 @@ def _binop(self, other, func, level=None, fill_value=None): result = func(this_vals, other_vals) name = _maybe_match_name(self, other) - return Series(result, index=new_index, name=name) + return self._constructor(result, index=new_index, name=name) add = _flex_method(operator.add, 'add') sub = _flex_method(operator.sub, 'subtract') @@ -2131,7 +2135,7 @@ def combine(self, other, func, fill_value=nan): new_index = self.index new_values = func(self.values, other) new_name = self.name - return Series(new_values, index=new_index, name=new_name) + return self._constructor(new_values, index=new_index, name=new_name) def combine_first(self, other): """ @@ -2151,7 +2155,7 @@ def combine_first(self, other): other = other.reindex(new_index, copy=False) name = _maybe_match_name(self, other) rs_vals = com._where_compat(isnull(this), other, this) - return Series(rs_vals, index=new_index, name=name) + return self._constructor(rs_vals, index=new_index, name=name) def update(self, other): """ @@ -2233,7 +2237,7 @@ def sort_index(self, ascending=True): ascending=ascending) new_values = self.values.take(indexer) - return Series(new_values, new_labels, name=self.name) + return self._constructor(new_values, new_labels, name=self.name) def argsort(self, axis=0, kind='quicksort', order=None): """ @@ -2289,7 +2293,7 @@ def rank(self, method='average', na_option='keep', ascending=True): from pandas.core.algorithms import rank ranks = rank(self.values, method=method, na_option=na_option, ascending=ascending) - return Series(ranks, index=self.index, name=self.name) + return self._constructor(ranks, index=self.index, name=self.name) def order(self, na_last=True, ascending=True, kind='mergesort'): """ @@ -2341,7 +2345,7 @@ def _try_kind_sort(arr): sortedIdx[n:] = idx[good][argsorted] sortedIdx[:n] = idx[bad] - return Series(arr[sortedIdx], index=self.index[sortedIdx], + return self._constructor(arr[sortedIdx], index=self.index[sortedIdx], name=self.name) def sortlevel(self, level=0, ascending=True): @@ -2364,7 +2368,7 @@ def sortlevel(self, level=0, ascending=True): new_index, indexer = self.index.sortlevel(level, ascending=ascending) new_values = self.values.take(indexer) - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name) def swaplevel(self, i, j, copy=True): """ @@ -2380,7 +2384,7 @@ def swaplevel(self, i, j, copy=True): swapped : Series """ new_index = self.index.swaplevel(i, j) - return Series(self.values, index=new_index, copy=copy, name=self.name) + return self._constructor(self.values, index=new_index, copy=copy, name=self.name) def reorder_levels(self, order): """ @@ -2488,14 +2492,14 @@ def map_f(values, f): if isinstance(arg, (dict, Series)): if isinstance(arg, dict): - arg = Series(arg) + arg = self._constructor(arg) indexer = arg.index.get_indexer(values) new_values = com.take_1d(arg.values, indexer) - return Series(new_values, index=self.index, name=self.name) + return self._constructor(new_values, index=self.index, name=self.name) else: mapped = map_f(values, arg) - return Series(mapped, index=self.index, name=self.name) + return self._constructor(mapped, index=self.index, name=self.name) def apply(self, func, convert_dtype=True, args=(), **kwds): """ @@ -2519,7 +2523,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): y : Series or DataFrame if func returns a Series """ if len(self) == 0: - return Series() + return self._constructor() if kwds or args and not isinstance(func, np.ufunc): f = lambda x: func(x, *args, **kwds) @@ -2538,7 +2542,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): from pandas.core.frame import DataFrame return DataFrame(mapped.tolist(), index=self.index) else: - return Series(mapped, index=self.index, name=self.name) + return self._constructor(mapped, index=self.index, name=self.name) def align(self, other, join='outer', level=None, copy=True, fill_value=None, method=None, limit=None): @@ -2636,7 +2640,7 @@ def reindex(self, index=None, method=None, level=None, fill_value=pa.NA, return self if len(self.index) == 0: - return Series(nan, index=index, name=self.name) + return self._constructor(nan, index=index, name=self.name) new_index, indexer = self.index.reindex(index, method=method, level=level, limit=limit, @@ -2647,7 +2651,7 @@ def reindex(self, index=None, method=None, level=None, fill_value=pa.NA, def _reindex_with_indexers(self, index, indexer, copy, fill_value): new_values = com.take_1d(self.values, indexer, fill_value=fill_value) - return Series(new_values, index=index, name=self.name) + return self._constructor(new_values, index=index, name=self.name) def reindex_axis(self, labels, axis=0, **kwargs): """ for compatibility with higher dims """ @@ -2696,7 +2700,7 @@ def take(self, indices, axis=0, convert=True): indices = com._ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self.values.take(indices) - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name) truncate = generic.truncate @@ -2756,7 +2760,7 @@ def fillna(self, value=None, method=None, inplace=False, if inplace: result = self else: - result = Series(values, index=self.index, name=self.name) + result = self._constructor(values, index=self.index, name=self.name) if not inplace: return result @@ -2859,7 +2863,7 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest} fill_f(result.values, limit=limit, mask=mask) if not inplace: - result = Series(result.values, index=self.index, + result = self._constructor(result.values, index=self.index, name=self.name) else: raise ValueError('Unrecognized to_replace type %s' % @@ -3066,17 +3070,17 @@ def _get_values(): new_values[:periods] = self.values[-periods:] new_values[periods:] = fill_value - return Series(new_values, index=self.index, name=self.name) + return self._constructor(new_values, index=self.index, name=self.name) elif isinstance(self.index, PeriodIndex): orig_offset = datetools.to_offset(self.index.freq) if orig_offset == offset: - return Series(_get_values(), self.index.shift(periods), + return self._constructor(_get_values(), self.index.shift(periods), name=self.name) msg = ('Given freq %s does not match PeriodIndex freq %s' % (offset.rule_code, orig_offset.rule_code)) raise ValueError(msg) else: - return Series(_get_values(), + return self._constructor(_get_values(), index=self.index.shift(periods, offset), name=self.name) @@ -3124,7 +3128,7 @@ def asof(self, where): locs = self.index.asof_locs(where, notnull(values)) new_values = com.take_1d(values, locs) - return Series(new_values, index=where, name=self.name) + return self._constructor(new_values, index=where, name=self.name) def interpolate(self, method='linear'): """ @@ -3175,7 +3179,7 @@ def interpolate(self, method='linear'): result[firstIndex:][invalid] = np.interp( inds[invalid], inds[valid], values[firstIndex:][valid]) - return Series(result, index=self.index, name=self.name) + return self._constructor(result, index=self.index, name=self.name) def rename(self, mapper, inplace=False): """ @@ -3220,7 +3224,7 @@ def rename(self, mapper, inplace=False): @property def weekday(self): - return Series([d.weekday() for d in self.index], index=self.index) + return self._constructor([d.weekday() for d in self.index], index=self.index) def tz_convert(self, tz, copy=True): """ @@ -3242,7 +3246,7 @@ def tz_convert(self, tz, copy=True): if copy: new_values = new_values.copy() - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name) def tz_localize(self, tz, copy=True): """ @@ -3277,7 +3281,7 @@ def tz_localize(self, tz, copy=True): if copy: new_values = new_values.copy() - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name) @cache_readonly def str(self): @@ -3526,7 +3530,7 @@ def to_timestamp(self, freq=None, how='start', copy=True): new_values = new_values.copy() new_index = self.index.to_timestamp(freq=freq, how=how) - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name) def to_period(self, freq=None, copy=True): """ @@ -3548,4 +3552,4 @@ def to_period(self, freq=None, copy=True): if freq is None: freq = self.index.freqstr or self.index.inferred_freq new_index = self.index.to_period(freq=freq) - return Series(new_values, index=new_index, name=self.name) + return self._constructor(new_values, index=new_index, name=self.name)