diff --git a/RELEASE.rst b/RELEASE.rst index 2ba2296064612..4f420f45a5c91 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -43,6 +43,11 @@ pandas 0.10.0 `describe_option`, and `reset_option`. Deprecate `set_printoptions` and `reset_printoptions` (#2393) +**Experimental Features** + - Add support for Panel4D, a named 4 Dimensional stucture + - Add support for ndpanel factory functions, to create custom, domain-specific + N Dimensional containers + **API Changes** - inf/-inf are no longer considered as NA by isnull/notnull. To be clear, this @@ -85,6 +90,7 @@ pandas 0.10.0 - Add `line_terminator` option to DataFrame.to_csv (#2383) - added implementation of str(x)/unicode(x)/bytes(x) to major pandas data structures, which should do the right thing on both py2.x and py3.x. (#2224) + - Added boolean comparison operators to Panel **Bug fixes** diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 3c3c67092c8f1..da3a7338d4fb2 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -798,3 +798,122 @@ method: major_axis=date_range('1/1/2000', periods=5), minor_axis=['a', 'b', 'c', 'd']) panel.to_frame() + +Panel4D (Experimental) +---------------------- + +``Panel4D`` is a 4-Dimensional named container very much like a ``Panel``, but +having 4 named dimensions. It is intended as a test bed for more N-Dimensional named +containers. + + - **labels**: axis 0, each item corresponds to a Panel contained inside + - **items**: axis 1, each item corresponds to a DataFrame contained inside + - **major_axis**: axis 2, it is the **index** (rows) of each of the + DataFrames + - **minor_axis**: axis 3, it is the **columns** of each of the DataFrames + + +``Panel4D`` is a sub-class of ``Panel``, so most methods that work on Panels are +applicable to Panel4D. The following methods are disabled: + + - ``join , to_frame , to_excel , to_sparse , groupby`` + +Construction of Panel4D works in a very similar manner to a ``Panel`` + +From 4D ndarray with optional axis labels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + p4d = Panel4D(randn(2, 2, 5, 4), + labels=['Label1','Label2'], + items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + p4d + + +From dict of Panel objects +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + data = { 'Label1' : Panel({ 'Item1' : DataFrame(randn(4, 3)) }), + 'Label2' : Panel({ 'Item2' : DataFrame(randn(4, 2)) }) } + Panel4D(data) + +Note that the values in the dict need only be **convertible to Panels**. +Thus, they can be any of the other valid inputs to Panel as per above. + +Slicing +~~~~~~~ + +Slicing works in a similar manner to a Panel. ``[]`` slices the first dimension. +``.ix`` allows you to slice abitrarily and get back lower dimensional objects + +.. ipython:: python + + p4d['Label1'] + +4D -> Panel + +.. ipython:: python + + p4d.ix[:,:,:,'A'] + +4D -> DataFrame + +.. ipython:: python + + p4d.ix[:,:,0,'A'] + +4D -> Series + +.. ipython:: python + + p4d.ix[:,0,0,'A'] + +Transposing +~~~~~~~~~~~ + +A Panel4D can be rearranged using its ``transpose`` method (which does not make a +copy by default unless the data are heterogeneous): + +.. ipython:: python + + p4d.transpose(3, 2, 1, 0) + +PanelND (Experimental) +---------------------- + +PanelND is a module with a set of factory functions to enable a user to construct N-dimensional named +containers like Panel4D, with a custom set of axis labels. Thus a domain-specific container can easily be +created. + +The following creates a Panel5D. A new panel type object must be sliceable into a lower dimensional object. +Here we slice to a Panel4D. + +.. ipython:: python + + from pandas.core import panelnd + Panel5D = panelnd.create_nd_panel_factory( + klass_name = 'Panel5D', + axis_orders = [ 'cool', 'labels','items','major_axis','minor_axis'], + axis_slices = { 'labels' : 'labels', 'items' : 'items', + 'major_axis' : 'major_axis', 'minor_axis' : 'minor_axis' }, + slicer = Panel4D, + axis_aliases = { 'major' : 'major_axis', 'minor' : 'minor_axis' }, + stat_axis = 2) + + p5d = Panel5D(dict(C1 = p4d)) + p5d + + # print a slice of our 5D + p5d.ix['C1',:,:,0:3,:] + + # transpose it + p5d.transpose(1,2,3,4,0) + + # look at the shape & dim + p5d.shape + p5d.ndim diff --git a/doc/source/v0.10.0.txt b/doc/source/v0.10.0.txt index 86741304ef479..b058dd5683db5 100644 --- a/doc/source/v0.10.0.txt +++ b/doc/source/v0.10.0.txt @@ -110,6 +110,23 @@ Updated PyTables Support import os os.remove('store.h5') +N Dimensional Panels (Experimental) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Adding experimental support for Panel4D and factory functions to create n-dimensional named panels. +:ref:`Docs ` for NDim. Here is a taste of what to expect. + + .. ipython:: python + + p4d = Panel4D(randn(2, 2, 5, 4), + labels=['Label1','Label2'], + items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + p4d + + + API changes ~~~~~~~~~~~ diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 41aaaa2d15e9b..f3c8289f31dff 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -13,6 +13,7 @@ _get_combined_index) from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels from pandas.core.internals import BlockManager, make_block, form_blocks +from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.util import py3compat @@ -104,7 +105,7 @@ def f(self, other): def _panel_arith_method(op, name): @Substitution(op) - def f(self, other, axis='items'): + def f(self, other, axis = 0): """ Wrapper method for %s @@ -123,6 +124,44 @@ def f(self, other, axis='items'): f.__name__ = name return f +def _comp_method(func, name): + + def na_op(x, y): + try: + result = func(x, y) + except TypeError: + xrav = x.ravel() + result = np.empty(x.size, dtype=x.dtype) + if isinstance(y, np.ndarray): + yrav = y.ravel() + mask = notnull(xrav) & notnull(yrav) + result[mask] = func(np.array(list(xrav[mask])), + np.array(list(yrav[mask]))) + else: + mask = notnull(xrav) + result[mask] = func(np.array(list(xrav[mask])), y) + + if func == operator.ne: # pragma: no cover + np.putmask(result, -mask, True) + else: + np.putmask(result, -mask, False) + result = result.reshape(x.shape) + + return result + + @Appender('Wrapper for comparison method %s' % name) + def f(self, other): + if isinstance(other, self._constructor): + return self._compare_constructor(other, func) + elif isinstance(other, (self._constructor_sliced, DataFrame, Series)): + raise Exception("input needs alignment for this object [%s]" % self._constructor) + else: + return self._combine_const(other, na_op) + + + f.__name__ = name + + return f _agg_doc = """ Return %(desc)s over requested axis @@ -280,7 +319,6 @@ def _init_dict(self, data, axes, dtype=None): # shallow copy arrays = [] - reshaped_data = data.copy() haxis_shape = [ len(a) for a in raxes ] for h in haxis: v = values = data.get(h) @@ -401,6 +439,51 @@ def __array_wrap__(self, result): d['copy'] = False return self._constructor(result, **d) + #---------------------------------------------------------------------- + # Comparison methods + + def _indexed_same(self, other): + return all([ getattr(self,a).equals(getattr(other,a)) for a in self._AXIS_ORDERS ]) + + def _compare_constructor(self, other, func): + if not self._indexed_same(other): + raise Exception('Can only compare identically-labeled ' + 'same type objects') + + new_data = {} + for col in getattr(self,self._info_axis): + new_data[col] = func(self[col], other[col]) + + d = self._construct_axes_dict() + d['copy'] = False + return self._constructor(data=new_data, **d) + + # boolean operators + __and__ = _arith_method(operator.and_, '__and__') + __or__ = _arith_method(operator.or_, '__or__') + __xor__ = _arith_method(operator.xor, '__xor__') + + def __neg__(self): + return -1 * self + + def __invert__(self): + return -1 * self + + # Comparison methods + __eq__ = _comp_method(operator.eq, '__eq__') + __ne__ = _comp_method(operator.ne, '__ne__') + __lt__ = _comp_method(operator.lt, '__lt__') + __gt__ = _comp_method(operator.gt, '__gt__') + __le__ = _comp_method(operator.le, '__le__') + __ge__ = _comp_method(operator.ge, '__ge__') + + eq = _comp_method(operator.eq, 'eq') + ne = _comp_method(operator.ne, 'ne') + gt = _comp_method(operator.gt, 'gt') + lt = _comp_method(operator.lt, 'lt') + ge = _comp_method(operator.ge, 'ge') + le = _comp_method(operator.le, 'le') + #---------------------------------------------------------------------- # Magic methods @@ -435,14 +518,14 @@ def __unicode__(self): class_name = str(self.__class__) shape = self.shape - dims = 'Dimensions: %s' % ' x '.join([ "%d (%s)" % (s, a) for a,s in zip(self._AXIS_ORDERS,shape) ]) + dims = u'Dimensions: %s' % ' x '.join([ "%d (%s)" % (s, a) for a,s in zip(self._AXIS_ORDERS,shape) ]) def axis_pretty(a): v = getattr(self,a) if len(v) > 0: - return '%s axis: %s to %s' % (a.capitalize(),v[0],v[-1]) + return u'%s axis: %s to %s' % (a.capitalize(),com.pprint_thing(v[0]),com.pprint_thing(v[-1])) else: - return '%s axis: None' % a.capitalize() + return u'%s axis: None' % a.capitalize() output = '\n'.join([class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) @@ -496,9 +579,9 @@ def ix(self): return self._ix def _wrap_array(self, arr, axes, copy=False): - items, major, minor = axes - return self._constructor(arr, items=items, major_axis=major, - minor_axis=minor, copy=copy) + d = dict([ (a,ax) for a,ax in zip(self._AXIS_ORDERS,axes) ]) + d['copy'] = False + return self._constructor(arr, **d) fromDict = from_dict @@ -742,7 +825,10 @@ def reindex(self, major=None, minor=None, method=None, if (method is None and not self._is_mixed_type and al <= 3): items = kwargs.get('items') if com._count_not_none(items, major, minor) == 3: - return self._reindex_multi(items, major, minor) + try: + return self._reindex_multi(items, major, minor) + except: + pass if major is not None: result = result._reindex_axis(major, method, al-2, copy) @@ -874,12 +960,12 @@ def _combine(self, other, func, axis=0): elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) elif np.isscalar(other): - new_values = func(self.values, other) - d = self._construct_axes_dict() - return self._constructor(new_values, **d) + return self._combine_const(other, func) - def __neg__(self): - return -1 * self + def _combine_const(self, other, func): + new_values = func(self.values, other) + d = self._construct_axes_dict() + return self._constructor(new_values, **d) def _combine_frame(self, other, func, axis=0): index, columns = self._get_plane_axes(axis) @@ -1434,8 +1520,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, contain data in the same place. """ - if not isinstance(other, Panel): - other = Panel(other) + if not isinstance(other, self._constructor): + other = self._constructor(other) other = other.reindex(items=self.items) diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index 504111bef5414..fe99d6c0eab78 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -1,112 +1,39 @@ """ Panel4D: a 4-d dict like collection of panels """ from pandas.core.panel import Panel +from pandas.core import panelnd import pandas.lib as lib +Panel4D = panelnd.create_nd_panel_factory( + klass_name = 'Panel4D', + axis_orders = [ 'labels','items','major_axis','minor_axis'], + axis_slices = { 'labels' : 'labels', 'items' : 'items', 'major_axis' : 'major_axis', 'minor_axis' : 'minor_axis' }, + slicer = Panel, + axis_aliases = { 'major' : 'major_axis', 'minor' : 'minor_axis' }, + stat_axis = 2) + + + +def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): + """ + Represents a 4 dimensonal structured + + Parameters + ---------- + data : ndarray (labels x items x major x minor), or dict of Panels + + labels : Index or array-like : axis=0 + items : Index or array-like : axis=1 + major_axis : Index or array-like: axis=2 + minor_axis : Index or array-like: axis=3 + + dtype : dtype, default None + Data type to force, otherwise infer + copy : boolean, default False + Copy data from inputs. Only affects DataFrame / 2d ndarray input + """ + self._init_data( data=data, labels=labels, items=items, major_axis=major_axis, minor_axis=minor_axis, + copy=copy, dtype=dtype) +Panel4D.__init__ = panel4d_init -class Panel4D(Panel): - _AXIS_ORDERS = ['labels','items','major_axis','minor_axis'] - _AXIS_NUMBERS = dict([ (a,i) for i, a in enumerate(_AXIS_ORDERS) ]) - _AXIS_ALIASES = { - 'major' : 'major_axis', - 'minor' : 'minor_axis' - } - _AXIS_NAMES = dict([ (i,a) for i, a in enumerate(_AXIS_ORDERS) ]) - _AXIS_SLICEMAP = { - 'items' : 'items', - 'major_axis' : 'major_axis', - 'minor_axis' : 'minor_axis' - } - _AXIS_LEN = len(_AXIS_ORDERS) - - # major - _default_stat_axis = 2 - - # info axis - _het_axis = 0 - _info_axis = _AXIS_ORDERS[_het_axis] - - labels = lib.AxisProperty(0) - items = lib.AxisProperty(1) - major_axis = lib.AxisProperty(2) - minor_axis = lib.AxisProperty(3) - - _constructor_sliced = Panel - - def __init__(self, data=None, labels=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): - """ - Represents a 4 dimensonal structured - - Parameters - ---------- - data : ndarray (labels x items x major x minor), or dict of Panels - - labels : Index or array-like : axis=0 - items : Index or array-like : axis=1 - major_axis : Index or array-like: axis=2 - minor_axis : Index or array-like: axis=3 - - dtype : dtype, default None - Data type to force, otherwise infer - copy : boolean, default False - Copy data from inputs. Only affects DataFrame / 2d ndarray input - """ - self._init_data( data=data, labels=labels, items=items, major_axis=major_axis, minor_axis=minor_axis, - copy=copy, dtype=dtype) - - def _get_plane_axes(self, axis): - axis = self._get_axis_name(axis) - - if axis == 'major_axis': - items = self.labels - major = self.items - minor = self.minor_axis - elif axis == 'minor_axis': - items = self.labels - major = self.items - minor = self.major_axis - elif axis == 'items': - items = self.labels - major = self.major_axis - minor = self.minor_axis - elif axis == 'labels': - items = self.items - major = self.major_axis - minor = self.minor_axis - - return items, major, minor - - def _combine(self, other, func, axis=0): - if isinstance(other, Panel4D): - return self._combine_panel4d(other, func) - return super(Panel4D, self)._combine(other, func, axis=axis) - - def _combine_panel4d(self, other, func): - labels = self.labels + other.labels - items = self.items + other.items - major = self.major_axis + other.major_axis - minor = self.minor_axis + other.minor_axis - - # could check that everything's the same size, but forget it - this = self.reindex(labels=labels, items=items, major=major, minor=minor) - other = other.reindex(labels=labels, items=items, major=major, minor=minor) - - result_values = func(this.values, other.values) - - return self._constructor(result_values, labels, items, major, minor) - - def join(self, other, how='left', lsuffix='', rsuffix=''): - if isinstance(other, Panel4D): - join_major, join_minor = self._get_join_index(other, how) - this = self.reindex(major=join_major, minor=join_minor) - other = other.reindex(major=join_major, minor=join_minor) - merged_data = this._data.merge(other._data, lsuffix, rsuffix) - return self._constructor(merged_data) - return super(Panel4D, self).join(other=other,how=how,lsuffix=lsuffix,rsuffix=rsuffix) - - ### remove operations #### - def to_frame(self, *args, **kwargs): - raise NotImplementedError - def to_excel(self, *args, **kwargs): - raise NotImplementedError diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index e4638750aa1b2..22f6dac6b640c 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -46,7 +46,7 @@ def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_a for i, a in enumerate(axis_orders): setattr(klass,a,lib.AxisProperty(i)) - # define the __init__ + #### define the methods #### def __init__(self, *args, **kwargs): if not (kwargs.get('data') or len(args)): raise Exception("must supply at least a data argument to [%s]" % klass_name) @@ -57,8 +57,8 @@ def __init__(self, *args, **kwargs): self._init_data( *args, **kwargs) klass.__init__ = __init__ - # define _get_place_axes def _get_plane_axes(self, axis): + axis = self._get_axis_name(axis) index = self._AXIS_ORDERS.index(axis) @@ -66,18 +66,40 @@ def _get_plane_axes(self, axis): if index: planes.extend(self._AXIS_ORDERS[0:index]) if index != self._AXIS_LEN: - planes.extend(self._AXIS_ORDERS[index:]) - - return planes - klass._get_plane_axes - - # remove these operations - def to_frame(self, *args, **kwargs): - raise NotImplementedError - klass.to_frame = to_frame - def to_excel(self, *args, **kwargs): - raise NotImplementedError - klass.to_excel = to_excel + planes.extend(self._AXIS_ORDERS[index+1:]) + + return [ getattr(self,p) for p in planes ] + klass._get_plane_axes = _get_plane_axes + + def _combine(self, other, func, axis=0): + if isinstance(other, klass): + return self._combine_with_constructor(other, func) + return super(klass, self)._combine(other, func, axis=axis) + klass._combine = _combine + + def _combine_with_constructor(self, other, func): + + # combine labels to form new axes + new_axes = [] + for a in self._AXIS_ORDERS: + new_axes.append(getattr(self,a) + getattr(other,a)) + + # reindex: could check that everything's the same size, but forget it + d = dict([ (a,ax) for a,ax in zip(self._AXIS_ORDERS,new_axes) ]) + d['copy'] = False + this = self.reindex(**d) + other = other.reindex(**d) + + result_values = func(this.values, other.values) + + return self._constructor(result_values, **d) + klass._combine_with_constructor = _combine_with_constructor + + # set as NonImplemented operations which we don't support + for f in ['to_frame','to_excel','to_sparse','groupby','join','_get_join_index']: + def func(self, *args, **kwargs): + raise NotImplementedError + setattr(klass,f,func) return klass diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1d8b40d34dba2..cb4cc9eef5c88 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -642,6 +642,56 @@ def _check_view(self, indexer, comp): self.assert_((obj.values == 0).all()) comp(cp.ix[indexer].reindex_like(obj), obj) + def test_logical_with_nas(self): + d = Panel({ 'ItemA' : {'a': [np.nan, False] }, 'ItemB' : { 'a': [True, True] } }) + + result = d['ItemA'] | d['ItemB'] + expected = DataFrame({ 'a' : [np.nan, True] }) + assert_frame_equal(result, expected) + + result = d['ItemA'].fillna(False) | d['ItemB'] + expected = DataFrame({ 'a' : [True, True] }, dtype=object) + assert_frame_equal(result, expected) + + def test_neg(self): + # what to do? + assert_panel_equal(-self.panel, -1 * self.panel) + + def test_invert(self): + assert_panel_equal(-(self.panel < 0), ~(self.panel <0)) + + def test_comparisons(self): + p1 = tm.makePanel() + p2 = tm.makePanel() + + tp = p1.reindex(items = p1.items + ['foo']) + df = p1[p1.items[0]] + + def test_comp(func): + + # versus same index + result = func(p1, p2) + self.assert_(np.array_equal(result.values, + func(p1.values, p2.values))) + + # versus non-indexed same objs + self.assertRaises(Exception, func, p1, tp) + + # versus different objs + self.assertRaises(Exception, func, p1, df) + + # versus scalar + result3 = func(self.panel, 0) + self.assert_(np.array_equal(result3.values, + func(self.panel.values, 0))) + + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) + def test_get_value(self): for item in self.panel.items: for mjr in self.panel.major_axis[::2]: diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 8340d55604e24..bdfc4933f31ab 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -368,6 +368,51 @@ def test_setitem(self): self.panel4d['lP'] = self.panel4d['l1'] > 0 self.assert_(self.panel4d['lP'].values.dtype == np.bool_) + def test_comparisons(self): + p1 = tm.makePanel4D() + p2 = tm.makePanel4D() + + tp = p1.reindex(labels = p1.labels + ['foo']) + p = p1[p1.labels[0]] + + def test_comp(func): + result = func(p1, p2) + self.assert_(np.array_equal(result.values, + func(p1.values, p2.values))) + + # versus non-indexed same objs + self.assertRaises(Exception, func, p1, tp) + + # versus different objs + self.assertRaises(Exception, func, p1, p) + + result3 = func(self.panel4d, 0) + self.assert_(np.array_equal(result3.values, + func(self.panel4d.values, 0))) + + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) + + def test_setitem_ndarray(self): + raise nose.SkipTest + # from pandas import DateRange, datetools + + # timeidx = DateRange(start=datetime(2009,1,1), + # end=datetime(2009,12,31), + # offset=datetools.MonthEnd()) + # lons_coarse = np.linspace(-177.5, 177.5, 72) + # lats_coarse = np.linspace(-87.5, 87.5, 36) + # P = Panel(items=timeidx, major_axis=lons_coarse, minor_axis=lats_coarse) + # data = np.random.randn(72*36).reshape((72,36)) + # key = datetime(2009,2,28) + # P[key] = data# + + # assert_almost_equal(P[key].values, data) + def test_major_xs(self): ref = self.panel4d['l1']['ItemA']