Skip to content

Commit d22adf6

Browse files
authored
Adjust Series specific tests for string option (#55538)
1 parent 6e5e393 commit d22adf6

21 files changed

+242
-95
lines changed

pandas/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2024,6 +2024,14 @@ def warn_copy_on_write() -> bool:
20242024
)
20252025

20262026

2027+
@pytest.fixture
2028+
def using_infer_string() -> bool:
2029+
"""
2030+
Fixture to check if infer_string is enabled.
2031+
"""
2032+
return pd.options.future.infer_string
2033+
2034+
20272035
warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
20282036
if zoneinfo is not None:
20292037
warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw")) # type: ignore[arg-type]

pandas/tests/series/accessors/test_dt_accessor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,13 +586,15 @@ def test_strftime_dt64_days(self):
586586
# dtype may be S10 or U10 depending on python version
587587
tm.assert_index_equal(result, expected)
588588

589-
def test_strftime_period_days(self):
589+
def test_strftime_period_days(self, using_infer_string):
590590
period_index = period_range("20150301", periods=5)
591591
result = period_index.strftime("%Y/%m/%d")
592592
expected = Index(
593593
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
594594
dtype="=U10",
595595
)
596+
if using_infer_string:
597+
expected = expected.astype("string[pyarrow_numpy]")
596598
tm.assert_index_equal(result, expected)
597599

598600
def test_strftime_dt64_microsecond_resolution(self):

pandas/tests/series/indexing/test_delitem.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,16 @@ def test_delitem(self):
3131
del s[0]
3232
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
3333

34-
def test_delitem_object_index(self):
34+
def test_delitem_object_index(self, using_infer_string):
3535
# Index(dtype=object)
36-
s = Series(1, index=["a"])
36+
dtype = "string[pyarrow_numpy]" if using_infer_string else object
37+
s = Series(1, index=Index(["a"], dtype=dtype))
3738
del s["a"]
38-
tm.assert_series_equal(
39-
s, Series(dtype="int64", index=Index([], dtype="object"))
40-
)
39+
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
4140
s["a"] = 1
42-
tm.assert_series_equal(s, Series(1, index=["a"]))
41+
tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
4342
del s["a"]
44-
tm.assert_series_equal(
45-
s, Series(dtype="int64", index=Index([], dtype="object"))
46-
)
43+
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
4744

4845
def test_delitem_missing_key(self):
4946
# empty

pandas/tests/series/indexing/test_getitem.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def test_getitem_unrecognized_scalar(self):
7171
def test_getitem_negative_out_of_bounds(self):
7272
ser = Series(["a"] * 10, index=["a"] * 10)
7373

74-
msg = "index -11 is out of bounds for axis 0 with size 10"
74+
msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
7575
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
7676
with pytest.raises(IndexError, match=msg):
7777
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
@@ -363,7 +363,9 @@ def test_getitem_no_matches(self, box):
363363
key = Series(["C"], dtype=object)
364364
key = box(key)
365365

366-
msg = r"None of \[Index\(\['C'\], dtype='object'\)\] are in the \[index\]"
366+
msg = (
367+
r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
368+
)
367369
with pytest.raises(KeyError, match=msg):
368370
ser[key]
369371

@@ -437,7 +439,7 @@ def test_getitem_boolean_empty(self):
437439

438440
# GH#5877
439441
# indexing with empty series
440-
ser = Series(["A", "B"])
442+
ser = Series(["A", "B"], dtype=object)
441443
expected = Series(dtype=object, index=Index([], dtype="int64"))
442444
result = ser[Series([], dtype=object)]
443445
tm.assert_series_equal(result, expected)

pandas/tests/series/indexing/test_setitem.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
date,
33
datetime,
44
)
5+
from decimal import Decimal
56

67
import numpy as np
78
import pytest
@@ -175,7 +176,8 @@ class TestSetitemScalarIndexer:
175176
def test_setitem_negative_out_of_bounds(self):
176177
ser = Series(["a"] * 10, index=["a"] * 10)
177178

178-
msg = "index -11 is out of bounds for axis 0 with size 10"
179+
# string index falls back to positional
180+
msg = "index -11|-1 is out of bounds for axis 0 with size 10"
179181
warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
180182
with pytest.raises(IndexError, match=msg):
181183
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
@@ -527,8 +529,12 @@ def test_setitem_empty_series_timestamp_preserves_dtype(self):
527529
Timedelta("9 days").to_pytimedelta(),
528530
],
529531
)
530-
def test_append_timedelta_does_not_cast(self, td):
532+
def test_append_timedelta_does_not_cast(self, td, using_infer_string, request):
531533
# GH#22717 inserting a Timedelta should _not_ cast to int64
534+
if using_infer_string and not isinstance(td, Timedelta):
535+
# TODO: GH#56010
536+
request.applymarker(pytest.mark.xfail(reason="inferred as string"))
537+
532538
expected = Series(["x", td], index=[0, "td"], dtype=object)
533539

534540
ser = Series(["x"])
@@ -595,13 +601,21 @@ def test_setitem_enlarge_with_na(
595601
expected = Series(expected_values, dtype=target_dtype)
596602
tm.assert_series_equal(ser, expected)
597603

598-
def test_setitem_enlargement_object_none(self, nulls_fixture):
604+
def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string):
599605
# GH#48665
600606
ser = Series(["a", "b"])
601607
ser[3] = nulls_fixture
602-
expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3])
608+
dtype = (
609+
"string[pyarrow_numpy]"
610+
if using_infer_string and not isinstance(nulls_fixture, Decimal)
611+
else object
612+
)
613+
expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3], dtype=dtype)
603614
tm.assert_series_equal(ser, expected)
604-
assert ser[3] is nulls_fixture
615+
if using_infer_string:
616+
ser[3] is np.nan
617+
else:
618+
assert ser[3] is nulls_fixture
605619

606620

607621
def test_setitem_scalar_into_readonly_backing_data():
@@ -845,20 +859,28 @@ def test_series_where(self, obj, key, expected, warn, val, is_inplace):
845859

846860
self._check_inplace(is_inplace, orig, arr, obj)
847861

848-
def test_index_where(self, obj, key, expected, warn, val):
862+
def test_index_where(self, obj, key, expected, warn, val, using_infer_string):
849863
mask = np.zeros(obj.shape, dtype=bool)
850864
mask[key] = True
851865

852-
res = Index(obj).where(~mask, val)
853-
expected_idx = Index(expected, dtype=expected.dtype)
854-
tm.assert_index_equal(res, expected_idx)
866+
if using_infer_string and obj.dtype == object:
867+
with pytest.raises(TypeError, match="Scalar must"):
868+
Index(obj).where(~mask, val)
869+
else:
870+
res = Index(obj).where(~mask, val)
871+
expected_idx = Index(expected, dtype=expected.dtype)
872+
tm.assert_index_equal(res, expected_idx)
855873

856-
def test_index_putmask(self, obj, key, expected, warn, val):
874+
def test_index_putmask(self, obj, key, expected, warn, val, using_infer_string):
857875
mask = np.zeros(obj.shape, dtype=bool)
858876
mask[key] = True
859877

860-
res = Index(obj).putmask(mask, val)
861-
tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
878+
if using_infer_string and obj.dtype == object:
879+
with pytest.raises(TypeError, match="Scalar must"):
880+
Index(obj).putmask(mask, val)
881+
else:
882+
res = Index(obj).putmask(mask, val)
883+
tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
862884

863885

864886
@pytest.mark.parametrize(

pandas/tests/series/indexing/test_where.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_pyarrow_string_dtype
5+
46
from pandas.core.dtypes.common import is_integer
57

68
import pandas as pd
@@ -230,6 +232,7 @@ def test_where_ndframe_align():
230232
tm.assert_series_equal(out, expected)
231233

232234

235+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set ints into string")
233236
def test_where_setitem_invalid():
234237
# GH 2702
235238
# make sure correct exceptions are raised on invalid list assignment

pandas/tests/series/methods/test_astype.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_astype_dict_like(self, dtype_class):
7676

7777
dt1 = dtype_class({"abc": str})
7878
result = ser.astype(dt1)
79-
expected = Series(["0", "2", "4", "6", "8"], name="abc")
79+
expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype=object)
8080
tm.assert_series_equal(result, expected)
8181

8282
dt2 = dtype_class({"abc": "float64"})
@@ -170,10 +170,12 @@ def test_astype_empty_constructor_equality(self, dtype):
170170
Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]),
171171
],
172172
)
173-
def test_astype_str_map(self, dtype, series):
173+
def test_astype_str_map(self, dtype, series, using_infer_string):
174174
# see GH#4405
175175
result = series.astype(dtype)
176176
expected = series.map(str)
177+
if using_infer_string:
178+
expected = expected.astype(object)
177179
tm.assert_series_equal(result, expected)
178180

179181
def test_astype_float_to_period(self):
@@ -283,13 +285,13 @@ def test_astype_str_cast_dt64(self):
283285
ts = Series([Timestamp("2010-01-04 00:00:00")])
284286
res = ts.astype(str)
285287

286-
expected = Series(["2010-01-04"])
288+
expected = Series(["2010-01-04"], dtype=object)
287289
tm.assert_series_equal(res, expected)
288290

289291
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
290292
res = ts.astype(str)
291293

292-
expected = Series(["2010-01-04 00:00:00-05:00"])
294+
expected = Series(["2010-01-04 00:00:00-05:00"], dtype=object)
293295
tm.assert_series_equal(res, expected)
294296

295297
def test_astype_str_cast_td64(self):
@@ -298,7 +300,7 @@ def test_astype_str_cast_td64(self):
298300
td = Series([Timedelta(1, unit="d")])
299301
ser = td.astype(str)
300302

301-
expected = Series(["1 days"])
303+
expected = Series(["1 days"], dtype=object)
302304
tm.assert_series_equal(ser, expected)
303305

304306
def test_dt64_series_astype_object(self):
@@ -345,7 +347,7 @@ def test_astype_from_float_to_str(self, dtype):
345347
# https://github.com/pandas-dev/pandas/issues/36451
346348
ser = Series([0.1], dtype=dtype)
347349
result = ser.astype(str)
348-
expected = Series(["0.1"])
350+
expected = Series(["0.1"], dtype=object)
349351
tm.assert_series_equal(result, expected)
350352

351353
@pytest.mark.parametrize(
@@ -416,7 +418,7 @@ def test_astype_cast_object_int(self):
416418

417419
tm.assert_series_equal(result, Series(np.arange(1, 5)))
418420

419-
def test_astype_unicode(self):
421+
def test_astype_unicode(self, using_infer_string):
420422
# see GH#7758: A bit of magic is required to set
421423
# default encoding to utf-8
422424
digits = string.digits
@@ -433,12 +435,14 @@ def test_astype_unicode(self):
433435
item = "野菜食べないとやばい"
434436
ser = Series([item.encode()])
435437
result = ser.astype(np.str_)
436-
expected = Series([item])
438+
expected = Series([item], dtype=object)
437439
tm.assert_series_equal(result, expected)
438440

439441
for ser in test_series:
440442
res = ser.astype(np.str_)
441443
expec = ser.map(str)
444+
if using_infer_string:
445+
expec = expec.astype(object)
442446
tm.assert_series_equal(res, expec)
443447

444448
# Restore the former encoding
@@ -534,12 +538,12 @@ def test_astype_categorical_to_other(self):
534538
expected = ser
535539
tm.assert_series_equal(ser.astype("category"), expected)
536540
tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
537-
msg = r"Cannot cast object dtype to float64"
541+
msg = r"Cannot cast object|string dtype to float64"
538542
with pytest.raises(ValueError, match=msg):
539543
ser.astype("float64")
540544

541545
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
542-
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
546+
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
543547
tm.assert_series_equal(cat.astype("str"), exp)
544548
s2 = Series(Categorical(["1", "2", "3", "4"]))
545549
exp2 = Series([1, 2, 3, 4]).astype("int")

pandas/tests/series/methods/test_combine_first.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def test_combine_first(self):
5353
# mixed types
5454
index = tm.makeStringIndex(20)
5555
floats = Series(np.random.default_rng(2).standard_normal(20), index=index)
56-
strings = Series(tm.makeStringIndex(10), index=index[::2])
56+
strings = Series(tm.makeStringIndex(10), index=index[::2], dtype=object)
5757

5858
combined = strings.combine_first(floats)
5959

pandas/tests/series/methods/test_convert_dtypes.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ def test_convert_dtypes(
186186
self,
187187
test_cases,
188188
params,
189+
using_infer_string,
189190
):
190191
data, maindtype, expected_default, expected_other = test_cases
191192
if (
@@ -219,6 +220,16 @@ def test_convert_dtypes(
219220
for spec, dtype in expected_other.items():
220221
if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
221222
expected_dtype = dtype
223+
if (
224+
using_infer_string
225+
and expected_default == "string"
226+
and expected_dtype == object
227+
and params[0]
228+
and not params[1]
229+
):
230+
# If we would convert with convert strings then infer_objects converts
231+
# with the option
232+
expected_dtype = "string[pyarrow_numpy]"
222233

223234
expected = pd.Series(data, dtype=expected_dtype)
224235
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)