Skip to content

Commit acb0b85

Browse files
authored
Merge branch 'main' into fix-get-level-values-special-names
2 parents 8d68063 + 08d21d7 commit acb0b85

File tree

10 files changed

+130
-22
lines changed

10 files changed

+130
-22
lines changed

.github/workflows/wheels.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
name: Wheel builder
1414

1515
on:
16+
release:
17+
types: [published]
1618
schedule:
1719
# 3:27 UTC every day
1820
- cron: "27 3 * * *"
@@ -216,3 +218,41 @@ jobs:
216218
source ci/upload_wheels.sh
217219
set_upload_vars
218220
upload_wheels
221+
222+
publish:
223+
if: >
224+
github.repository == 'pandas-dev/pandas' &&
225+
github.event_name == 'release' &&
226+
startsWith(github.ref, 'refs/tags/v')
227+
228+
needs:
229+
- build_sdist
230+
- build_wheels
231+
232+
runs-on: ubuntu-latest
233+
234+
environment:
235+
name: pypi
236+
permissions:
237+
id-token: write # OIDC for Trusted Publishing
238+
contents: read
239+
240+
steps:
241+
- name: Download all artefacts
242+
uses: actions/download-artifact@v4
243+
with:
244+
path: dist # everything lands in ./dist/**
245+
246+
- name: Collect files
247+
run: |
248+
mkdir -p upload
249+
# skip any wheel that contains 'pyodide'
250+
find dist -name '*pyodide*.whl' -prune -o \
251+
-name '*.whl' -exec mv {} upload/ \;
252+
find dist -name '*.tar.gz' -exec mv {} upload/ \;
253+
254+
- name: Publish to **PyPI** (Trusted Publishing)
255+
uses: pypa/gh-action-pypi-publish@release/v1
256+
with:
257+
packages-dir: upload
258+
skip-existing: true

doc/source/development/maintaining.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,10 @@ which will be triggered when the tag is pushed.
451451
- Set as the latest release: Leave checked, unless releasing a patch release for an older version
452452
(e.g. releasing 1.4.5 after 1.5 has been released)
453453

454-
5. Upload wheels to PyPI::
455-
456-
twine upload pandas/dist/pandas-<version>*.{whl,tar.gz} --skip-existing
454+
5. Verify wheels are uploaded automatically by GitHub Actions
455+
via `**Trusted Publishing** <https://docs.pypi.org/trusted-publishers/>`__
456+
when the GitHub `*Release* <https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases>`__
457+
is published. Do not run ``twine upload`` manually.
457458

458459
6. The GitHub release will after some hours trigger an
459460
`automated conda-forge PR <https://github.com/conda-forge/pandas-feedstock/pulls>`_.

doc/source/whatsnew/v2.3.3.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,24 @@ Most changes in this release are related to :class:`StringDtype` which will
1818
become the default string dtype in pandas 3.0. See
1919
:ref:`whatsnew_230.upcoming_changes` for more details.
2020

21+
.. _whatsnew_233.string_fixes.improvements:
22+
23+
Improvements
24+
^^^^^^^^^^^^
25+
- Update :meth:`DataFrame.select_dtypes` to keep selecting ``str`` columns when
26+
specifying ``include=["object"]`` for backwards compatibility. In a future
27+
release, this will be deprecated and code for pandas 3+ should be updated to
28+
do ``include=["str"]`` (:issue:`61916`)
29+
30+
2131
.. _whatsnew_233.string_fixes.bugs:
2232

2333
Bug fixes
2434
^^^^^^^^^
2535
- Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
2636
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
2737
with a compiled regex and custom flags (:issue:`62240`)
38+
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
2839

2940
.. ---------------------------------------------------------------------------
3041
.. _whatsnew_233.contributors:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ Other enhancements
226226
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
227227
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
228228
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
229+
- Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
229230
-
230231

231232
.. ---------------------------------------------------------------------------

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,12 @@ def _str_fullmatch(
326326
flags: int = 0,
327327
na: Scalar | lib.NoDefault = lib.no_default,
328328
):
329-
if not pat.endswith("$") or pat.endswith("\\$"):
330-
pat = f"{pat}$"
329+
if (not pat.endswith("$") or pat.endswith("\\$")) and not pat.startswith("^"):
330+
pat = f"^({pat})$"
331+
elif not pat.endswith("$") or pat.endswith("\\$"):
332+
pat = f"^({pat[1:]})$"
333+
elif not pat.startswith("^"):
334+
pat = f"^({pat[0:-1]})$"
331335
return self._str_match(pat, case, flags, na)
332336

333337
def _str_find(self, sub: str, start: int = 0, end: int | None = None):

pandas/core/dtypes/cast.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,9 @@ def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:
865865
np.dtype("<U").type, # type: ignore[arg-type]
866866
}
867867
if non_string_dtypes != dtype_set:
868-
raise TypeError("string dtypes are not allowed, use 'object' instead")
868+
raise TypeError(
869+
"numpy string dtypes are not allowed, use 'str' or 'object' instead"
870+
)
869871

870872

871873
def coerce_indexer_dtype(indexer, categories) -> np.ndarray:

pandas/core/frame.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5150,10 +5150,14 @@ def check_int_infer_dtype(dtypes):
51505150
def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
51515151
# GH 46870: BooleanDtype._is_numeric == True but should be excluded
51525152
dtype = dtype if not isinstance(dtype, ArrowDtype) else dtype.numpy_dtype
5153-
return issubclass(dtype.type, tuple(dtypes_set)) or (
5154-
np.number in dtypes_set
5155-
and getattr(dtype, "_is_numeric", False)
5156-
and not is_bool_dtype(dtype)
5153+
return (
5154+
issubclass(dtype.type, tuple(dtypes_set))
5155+
or (
5156+
np.number in dtypes_set
5157+
and getattr(dtype, "_is_numeric", False)
5158+
and not is_bool_dtype(dtype)
5159+
)
5160+
or (dtype.type is str and np.object_ in dtypes_set)
51575161
)
51585162

51595163
def predicate(arr: ArrayLike) -> bool:

pandas/tests/extension/test_arrow.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,23 +1870,28 @@ def test_str_match(pat, case, na, exp):
18701870

18711871
@pytest.mark.parametrize(
18721872
"pat, case, na, exp",
1873+
# Note: keep cases in sync with
1874+
# pandas/tests/strings/test_find_replace.py::test_str_fullmatch_extra_cases
18731875
[
1874-
["abc", False, None, [True, True, False, None]],
1876+
["abc", False, None, [True, False, False, None]],
18751877
["Abc", True, None, [False, False, False, None]],
18761878
["bc", True, None, [False, False, False, None]],
1877-
["ab", False, None, [True, True, False, None]],
1878-
["a[a-z]{2}", False, None, [True, True, False, None]],
1879+
["ab", False, None, [False, False, False, None]],
1880+
["a[a-z]{2}", False, None, [True, False, False, None]],
18791881
["A[a-z]{1}", True, None, [False, False, False, None]],
18801882
# GH Issue: #56652
18811883
["abc$", False, None, [True, False, False, None]],
18821884
["abc\\$", False, None, [False, True, False, None]],
18831885
["Abc$", True, None, [False, False, False, None]],
18841886
["Abc\\$", True, None, [False, False, False, None]],
1887+
# https://github.com/pandas-dev/pandas/issues/61072
1888+
["(abc)|(abx)", True, None, [True, False, False, None]],
1889+
["((abc)|(abx))", True, None, [True, False, False, None]],
18851890
],
18861891
)
18871892
def test_str_fullmatch(pat, case, na, exp):
18881893
ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string()))
1889-
result = ser.str.match(pat, case=case, na=na)
1894+
result = ser.str.fullmatch(pat, case=case, na=na)
18901895
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
18911896
tm.assert_series_equal(result, expected)
18921897

pandas/tests/frame/methods/test_select_dtypes.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
102102
ri = df.select_dtypes(include=[str])
103103
tm.assert_frame_equal(ri, ei)
104104

105+
ri = df.select_dtypes(include=["object"])
106+
ei = df[["a"]]
107+
tm.assert_frame_equal(ri, ei)
108+
105109
def test_select_dtypes_exclude_using_list_like(self):
106110
df = DataFrame(
107111
{
@@ -309,17 +313,15 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_strin
309313
df["g"] = df.f.diff()
310314
assert not hasattr(np, "u8")
311315
r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
312-
if using_infer_string:
313-
e = df[["b"]]
314-
else:
315-
e = df[["a", "b"]]
316+
# if using_infer_string:
317+
# TODO warn
318+
e = df[["a", "b"]]
316319
tm.assert_frame_equal(r, e)
317320

318321
r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
319-
if using_infer_string:
320-
e = df[["b", "g"]]
321-
else:
322-
e = df[["a", "b", "g"]]
322+
# if using_infer_string:
323+
# TODO warn
324+
e = df[["a", "b", "g"]]
323325
tm.assert_frame_equal(r, e)
324326

325327
def test_select_dtypes_empty(self):

pandas/tests/strings/test_find_replace.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,44 @@ def test_fullmatch_compiled_regex(any_string_dtype):
10751075
values.str.fullmatch(re.compile("ab"), flags=re.IGNORECASE)
10761076

10771077

1078+
@pytest.mark.parametrize(
1079+
"pat, case, na, exp",
1080+
# Note: keep cases in sync with
1081+
# pandas/tests/extension/test_arrow.py::test_str_fullmatch
1082+
[
1083+
["abc", False, None, [True, False, False, None]],
1084+
["Abc", True, None, [False, False, False, None]],
1085+
["bc", True, None, [False, False, False, None]],
1086+
["ab", False, None, [False, False, False, None]],
1087+
["a[a-z]{2}", False, None, [True, False, False, None]],
1088+
["A[a-z]{1}", True, None, [False, False, False, None]],
1089+
# GH Issue: #56652
1090+
["abc$", False, None, [True, False, False, None]],
1091+
["abc\\$", False, None, [False, True, False, None]],
1092+
["Abc$", True, None, [False, False, False, None]],
1093+
["Abc\\$", True, None, [False, False, False, None]],
1094+
# https://github.com/pandas-dev/pandas/issues/61072
1095+
["(abc)|(abx)", True, None, [True, False, False, None]],
1096+
["((abc)|(abx))", True, None, [True, False, False, None]],
1097+
],
1098+
)
1099+
def test_str_fullmatch_extra_cases(any_string_dtype, pat, case, na, exp):
1100+
ser = Series(["abc", "abc$", "$abc", None], dtype=any_string_dtype)
1101+
result = ser.str.fullmatch(pat, case=case, na=na)
1102+
1103+
if any_string_dtype == "str":
1104+
# NaN propagates as False
1105+
exp[-1] = False
1106+
expected_dtype = bool
1107+
else:
1108+
expected_dtype = (
1109+
"object" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
1110+
)
1111+
expected = Series([True, False, np.nan, False], dtype=expected_dtype)
1112+
expected = Series(exp, dtype=expected_dtype)
1113+
tm.assert_series_equal(result, expected)
1114+
1115+
10781116
# --------------------------------------------------------------------------------------
10791117
# str.findall
10801118
# --------------------------------------------------------------------------------------

0 commit comments

Comments
 (0)