From c1de316c0e77d084af00da5b8135d23bd725ee4f Mon Sep 17 00:00:00 2001 From: ericcht Date: Wed, 23 Apr 2025 14:38:17 -0400 Subject: [PATCH 1/2] Update groupby.py --- pandas/core/groupby/groupby.py | 119 +++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 33539e8d294c1..c6628116739d0 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3231,63 +3231,80 @@ def max( def first( self, numeric_only: bool = False, min_count: int = -1, skipna: bool = True ) -> NDFrameT: - """ - Compute the first entry of each column within each group. + """ +Compute the first non-null entry of each column within each group. - Defaults to skipping NA elements. +This method operates column-wise, returning the first non-null value +in each column for every group. Unlike `nth(0)`, which returns the +first row (even if it contains nulls), `first()` skips over NA/null +values in each column independently. - Parameters - ---------- - numeric_only : bool, default False - Include only float, int, boolean columns. - min_count : int, default -1 - The required number of valid values to perform the operation. If fewer - than ``min_count`` valid values are present the result will be NA. - skipna : bool, default True - Exclude NA/null values. If an entire group is NA, the result will be NA. +Parameters +---------- +numeric_only : bool, default False + Include only float, int, boolean columns. +min_count : int, default -1 + The required number of valid values to perform the operation. If fewer + than ``min_count`` valid values are present the result will be NA. +skipna : bool, default True + Exclude NA/null values. If an entire group is NA, the result will be NA. - .. versionadded:: 2.2.1 + .. versionadded:: 2.2.1 - Returns - ------- - Series or DataFrame - First values within each group. +Returns +------- +Series or DataFrame + First non-null values within each group, selected independently per column. - See Also - -------- - DataFrame.groupby : Apply a function groupby to each row or column of a - DataFrame. - core.groupby.DataFrameGroupBy.last : Compute the last non-null entry - of each column. - core.groupby.DataFrameGroupBy.nth : Take the nth row from each group. +See Also +-------- +DataFrame.groupby : Group DataFrame using a mapper or by a Series of columns. +Series.groupby : Group Series using a mapper or by a Series of values. +GroupBy.nth : Take the nth row from each group. +GroupBy.head : Return the first `n` rows from each group. +GroupBy.last : Compute the last non-null entry of each column. - Examples - -------- - >>> df = pd.DataFrame( - ... dict( - ... A=[1, 1, 3], - ... B=[None, 5, 6], - ... C=[1, 2, 3], - ... D=["3/11/2000", "3/12/2000", "3/13/2000"], - ... ) - ... ) - >>> df["D"] = pd.to_datetime(df["D"]) - >>> df.groupby("A").first() - B C D - A - 1 5.0 1 2000-03-11 - 3 6.0 3 2000-03-13 - >>> df.groupby("A").first(min_count=2) - B C D - A - 1 NaN 1.0 2000-03-11 - 3 NaN NaN NaT - >>> df.groupby("A").first(numeric_only=True) - B C - A - 1 5.0 1 - 3 6.0 3 - """ +Examples +-------- +>>> df = pd.DataFrame( +... dict( +... A=[1, 1, 3], +... B=[None, 5, 6], +... C=[1, 2, 3], +... D=["3/11/2000", "3/12/2000", "3/13/2000"], +... ) +... ) +>>> df["D"] = pd.to_datetime(df["D"]) + +>>> df.groupby("A").first() + B C D +A +1 5.0 1 2000-03-11 +3 6.0 3 2000-03-13 + +>>> df.groupby("A").nth(0) + B C D +A +1 NaN 1 2000-03-11 +3 6.0 3 2000-03-13 + +>>> df.groupby("A").head(1) + A B C D +0 1 NaN 1 2000-03-11 +2 3 6.0 3 2000-03-13 + +>>> df.groupby("A").first(min_count=2) + B C D +A +1 NaN 1.0 2000-03-11 +3 NaN NaN NaT + +>>> df.groupby("A").first(numeric_only=True) + B C +A +1 5.0 1 +3 6.0 3 +""" def first_compat(obj: NDFrameT): def first(x: Series): From b4d068f193c481504d46afbfdb4852bd84d534da Mon Sep 17 00:00:00 2001 From: ericcht Date: Wed, 23 Apr 2025 14:48:06 -0400 Subject: [PATCH 2/2] Update groupby.py --- pandas/core/groupby/groupby.py | 137 +++++++++++++++++---------------- 1 file changed, 69 insertions(+), 68 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c6628116739d0..673fb45d98f08 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3231,80 +3231,81 @@ def max( def first( self, numeric_only: bool = False, min_count: int = -1, skipna: bool = True ) -> NDFrameT: - """ -Compute the first non-null entry of each column within each group. + """ + Compute the first non-null entry of each column within each group. -This method operates column-wise, returning the first non-null value -in each column for every group. Unlike `nth(0)`, which returns the -first row (even if it contains nulls), `first()` skips over NA/null -values in each column independently. + This method operates column-wise, returning the first non-null value + in each column for every group. Unlike `nth(0)`, which returns the + first row (even if it contains nulls), `first()` skips over NA/null + values in each column independently. -Parameters ----------- -numeric_only : bool, default False - Include only float, int, boolean columns. -min_count : int, default -1 - The required number of valid values to perform the operation. If fewer - than ``min_count`` valid values are present the result will be NA. -skipna : bool, default True - Exclude NA/null values. If an entire group is NA, the result will be NA. + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. + min_count : int, default -1 + The required number of valid values to perform the operation. If fewer + than ``min_count`` valid values are present the result will be NA. + skipna : bool, default True + Exclude NA/null values. If an entire group is NA, the result will be NA. - .. versionadded:: 2.2.1 + .. versionadded:: 2.2.1 -Returns -------- -Series or DataFrame - First non-null values within each group, selected independently per column. + Returns + ------- + Series or DataFrame + First non-null values within each group, selected independently per column. -See Also --------- -DataFrame.groupby : Group DataFrame using a mapper or by a Series of columns. -Series.groupby : Group Series using a mapper or by a Series of values. -GroupBy.nth : Take the nth row from each group. -GroupBy.head : Return the first `n` rows from each group. -GroupBy.last : Compute the last non-null entry of each column. + See Also + -------- + DataFrame.groupby : Group DataFrame using a mapper or by a Series of columns. + Series.groupby : Group Series using a mapper or by a Series of values. + GroupBy.nth : Take the nth row from each group. + GroupBy.head : Return the first `n` rows from each group. + GroupBy.last : Compute the last non-null entry of each column. + + Examples + -------- + >>> df = pd.DataFrame( + ... dict( + ... A=[1, 1, 3], + ... B=[None, 5, 6], + ... C=[1, 2, 3], + ... D=["3/11/2000", "3/12/2000", "3/13/2000"], + ... ) + ... ) + >>> df["D"] = pd.to_datetime(df["D"]) + + >>> df.groupby("A").first() + B C D + A + 1 5.0 1 2000-03-11 + 3 6.0 3 2000-03-13 + + >>> df.groupby("A").nth(0) + B C D + A + 1 NaN 1 2000-03-11 + 3 6.0 3 2000-03-13 + + >>> df.groupby("A").head(1) + A B C D + 0 1 NaN 1 2000-03-11 + 2 3 6.0 3 2000-03-13 + + >>> df.groupby("A").first(min_count=2) + B C D + A + 1 NaN 1.0 2000-03-11 + 3 NaN NaN NaT + + >>> df.groupby("A").first(numeric_only=True) + B C + A + 1 5.0 1 + 3 6.0 3 + """ -Examples --------- ->>> df = pd.DataFrame( -... dict( -... A=[1, 1, 3], -... B=[None, 5, 6], -... C=[1, 2, 3], -... D=["3/11/2000", "3/12/2000", "3/13/2000"], -... ) -... ) ->>> df["D"] = pd.to_datetime(df["D"]) - ->>> df.groupby("A").first() - B C D -A -1 5.0 1 2000-03-11 -3 6.0 3 2000-03-13 - ->>> df.groupby("A").nth(0) - B C D -A -1 NaN 1 2000-03-11 -3 6.0 3 2000-03-13 - ->>> df.groupby("A").head(1) - A B C D -0 1 NaN 1 2000-03-11 -2 3 6.0 3 2000-03-13 - ->>> df.groupby("A").first(min_count=2) - B C D -A -1 NaN 1.0 2000-03-11 -3 NaN NaN NaT - ->>> df.groupby("A").first(numeric_only=True) - B C -A -1 5.0 1 -3 6.0 3 -""" def first_compat(obj: NDFrameT): def first(x: Series):