@@ -989,6 +989,31 @@ Note that ``df.groupby('A').colname.std().`` is more efficient than
989
989
is only interesting over one column (here ``colname ``), it may be filtered
990
990
*before * applying the aggregation function.
991
991
992
+ .. note ::
993
+ Decimal and object columns are also "nuisance" columns. They are excluded from aggregate functions automatically in groupby.
994
+
995
+ If you do wish to include decimal or object columns in an aggregation with other non-nuisance data types, you must do so explicitly.
996
+
997
+ .. ipython :: python
998
+
999
+ from decimal import Decimal
1000
+ dec = pd.DataFrame(
1001
+ {' id' : [123 , 456 , 123 , 456 ],
1002
+ ' int_column' : [1 , 2 , 3 , 4 ],
1003
+ ' dec_column1' : [Decimal(' 0.50' ), Decimal(' 0.15' ), Decimal(' 0.25' ), Decimal(' 0.40' )]
1004
+ },
1005
+ columns = [' id' ,' int_column' ,' dec_column' ]
1006
+ )
1007
+
1008
+ # Decimal columns can be sum'd explicitly by themselves...
1009
+ dec.groupby([' id' ], as_index = False )[' dec_column' ].sum()
1010
+
1011
+ # ...but cannot be combined with standard data types or they will be excluded
1012
+ dec.groupby([' id' ], as_index = False )[' int_column' ,' dec_column' ].sum()
1013
+
1014
+ # Use .agg function to aggregate over standard and "nuisance" data types at the same time
1015
+ dec.groupby([' id' ], as_index = False ).agg({' int_column' : ' sum' , ' dec_column' : ' sum' })
1016
+
992
1017
.. _groupby.missing :
993
1018
994
1019
NA and NaT group handling
0 commit comments