diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 33539e8d294c1..673fb45d98f08 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3232,9 +3232,12 @@ def first( self, numeric_only: bool = False, min_count: int = -1, skipna: bool = True ) -> NDFrameT: """ - Compute the first entry of each column within each group. + Compute the first non-null entry of each column within each group. - Defaults to skipping NA elements. + This method operates column-wise, returning the first non-null value + in each column for every group. Unlike `nth(0)`, which returns the + first row (even if it contains nulls), `first()` skips over NA/null + values in each column independently. Parameters ---------- @@ -3251,15 +3254,15 @@ def first( Returns ------- Series or DataFrame - First values within each group. + First non-null values within each group, selected independently per column. See Also -------- - DataFrame.groupby : Apply a function groupby to each row or column of a - DataFrame. - core.groupby.DataFrameGroupBy.last : Compute the last non-null entry - of each column. - core.groupby.DataFrameGroupBy.nth : Take the nth row from each group. + DataFrame.groupby : Group DataFrame using a mapper or by a Series of columns. + Series.groupby : Group Series using a mapper or by a Series of values. + GroupBy.nth : Take the nth row from each group. + GroupBy.head : Return the first `n` rows from each group. + GroupBy.last : Compute the last non-null entry of each column. Examples -------- @@ -3272,23 +3275,38 @@ def first( ... ) ... ) >>> df["D"] = pd.to_datetime(df["D"]) + >>> df.groupby("A").first() - B C D + B C D A 1 5.0 1 2000-03-11 3 6.0 3 2000-03-13 + + >>> df.groupby("A").nth(0) + B C D + A + 1 NaN 1 2000-03-11 + 3 6.0 3 2000-03-13 + + >>> df.groupby("A").head(1) + A B C D + 0 1 NaN 1 2000-03-11 + 2 3 6.0 3 2000-03-13 + >>> df.groupby("A").first(min_count=2) B C D A - 1 NaN 1.0 2000-03-11 - 3 NaN NaN NaT + 1 NaN 1.0 2000-03-11 + 3 NaN NaN NaT + >>> df.groupby("A").first(numeric_only=True) - B C + B C A 1 5.0 1 3 6.0 3 """ + def first_compat(obj: NDFrameT): def first(x: Series): """Helper function for first item that isn't NA."""