Merge branch 'main' into fix-get-level-values-special-names

vgnshiyer · web-flow · commit acb0b857de6d · 2025-09-22T02:38:39.000+05:30
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -13,6 +13,8 @@
 name: Wheel builder
 
 on:
+  release:
+    types: [published]
   schedule:
   # 3:27 UTC every day
   - cron: "27 3 * * *"
@@ -216,3 +218,41 @@ jobs:
           source ci/upload_wheels.sh
           set_upload_vars
           upload_wheels
+
+  publish:
+    if: >
+      github.repository == 'pandas-dev/pandas' &&
+      github.event_name == 'release' &&
+      startsWith(github.ref, 'refs/tags/v')
+
+    needs:
+      - build_sdist
+      - build_wheels
+
+    runs-on: ubuntu-latest
+
+    environment:
+      name: pypi
+    permissions:
+      id-token: write         # OIDC for Trusted Publishing
+      contents: read
+
+    steps:
+      - name: Download all artefacts
+        uses: actions/download-artifact@v4
+        with:
+          path: dist          # everything lands in ./dist/**
+
+      - name: Collect files
+        run: |
+          mkdir -p upload
+          # skip any wheel that contains 'pyodide'
+          find dist -name '*pyodide*.whl' -prune -o \
+                    -name '*.whl'   -exec mv {} upload/ \;
+          find dist -name '*.tar.gz' -exec mv {} upload/ \;
+
+      - name: Publish to **PyPI** (Trusted Publishing)
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: upload
+          skip-existing: true
diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst
@@ -451,9 +451,10 @@ which will be triggered when the tag is pushed.
    - Set as the latest release: Leave checked, unless releasing a patch release for an older version
      (e.g. releasing 1.4.5 after 1.5 has been released)
 
-5. Upload wheels to PyPI::
-
-    twine upload pandas/dist/pandas-<version>*.{whl,tar.gz} --skip-existing
+5. Verify wheels are uploaded automatically by GitHub Actions
+   via `**Trusted Publishing** <https://docs.pypi.org/trusted-publishers/>`__
+   when the GitHub `*Release* <https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases>`__
+   is published. Do not run ``twine upload`` manually.
 
 6. The GitHub release will after some hours trigger an
    `automated conda-forge PR <https://github.com/conda-forge/pandas-feedstock/pulls>`_.
diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst
@@ -18,13 +18,24 @@ Most changes in this release are related to :class:`StringDtype` which will
 become the default string dtype in pandas 3.0. See
 :ref:`whatsnew_230.upcoming_changes` for more details.
 
+.. _whatsnew_233.string_fixes.improvements:
+
+Improvements
+^^^^^^^^^^^^
+- Update :meth:`DataFrame.select_dtypes` to keep selecting ``str`` columns when
+  specifying ``include=["object"]`` for backwards compatibility. In a future
+  release, this will be deprecated and code for pandas 3+ should be updated to
+  do ``include=["str"]`` (:issue:`61916`)
+
+
 .. _whatsnew_233.string_fixes.bugs:
 
 Bug fixes
 ^^^^^^^^^
 - Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
 - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
   with a compiled regex and custom flags (:issue:`62240`)
+- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_233.contributors:
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -226,6 +226,7 @@ Other enhancements
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
+- Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
@@ -326,8 +326,12 @@ def _str_fullmatch(
         flags: int = 0,
         na: Scalar | lib.NoDefault = lib.no_default,
     ):
-        if not pat.endswith("$") or pat.endswith("\\$"):
-            pat = f"{pat}$"
+        if (not pat.endswith("$") or pat.endswith("\\$")) and not pat.startswith("^"):
+            pat = f"^({pat})$"
+        elif not pat.endswith("$") or pat.endswith("\\$"):
+            pat = f"^({pat[1:]})$"
+        elif not pat.startswith("^"):
+            pat = f"^({pat[0:-1]})$"
         return self._str_match(pat, case, flags, na)
 
     def _str_find(self, sub: str, start: int = 0, end: int | None = None):
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -865,7 +865,9 @@ def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:
         np.dtype("<U").type,  # type: ignore[arg-type]
     }
     if non_string_dtypes != dtype_set:
-        raise TypeError("string dtypes are not allowed, use 'object' instead")
+        raise TypeError(
+            "numpy string dtypes are not allowed, use 'str' or 'object' instead"
+        )
 
 
 def coerce_indexer_dtype(indexer, categories) -> np.ndarray:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5150,10 +5150,14 @@ def check_int_infer_dtype(dtypes):
         def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
             # GH 46870: BooleanDtype._is_numeric == True but should be excluded
             dtype = dtype if not isinstance(dtype, ArrowDtype) else dtype.numpy_dtype
-            return issubclass(dtype.type, tuple(dtypes_set)) or (
-                np.number in dtypes_set
-                and getattr(dtype, "_is_numeric", False)
-                and not is_bool_dtype(dtype)
+            return (
+                issubclass(dtype.type, tuple(dtypes_set))
+                or (
+                    np.number in dtypes_set
+                    and getattr(dtype, "_is_numeric", False)
+                    and not is_bool_dtype(dtype)
+                )
+                or (dtype.type is str and np.object_ in dtypes_set)
             )
 
         def predicate(arr: ArrayLike) -> bool:
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -1870,23 +1870,28 @@ def test_str_match(pat, case, na, exp):
 
 @pytest.mark.parametrize(
     "pat, case, na, exp",
+    # Note: keep cases in sync with
+    # pandas/tests/strings/test_find_replace.py::test_str_fullmatch_extra_cases
     [
-        ["abc", False, None, [True, True, False, None]],
+        ["abc", False, None, [True, False, False, None]],
         ["Abc", True, None, [False, False, False, None]],
         ["bc", True, None, [False, False, False, None]],
-        ["ab", False, None, [True, True, False, None]],
-        ["a[a-z]{2}", False, None, [True, True, False, None]],
+        ["ab", False, None, [False, False, False, None]],
+        ["a[a-z]{2}", False, None, [True, False, False, None]],
         ["A[a-z]{1}", True, None, [False, False, False, None]],
         # GH Issue: #56652
         ["abc$", False, None, [True, False, False, None]],
         ["abc\\$", False, None, [False, True, False, None]],
         ["Abc$", True, None, [False, False, False, None]],
         ["Abc\\$", True, None, [False, False, False, None]],
+        # https://github.com/pandas-dev/pandas/issues/61072
+        ["(abc)|(abx)", True, None, [True, False, False, None]],
+        ["((abc)|(abx))", True, None, [True, False, False, None]],
     ],
 )
 def test_str_fullmatch(pat, case, na, exp):
     ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string()))
-    result = ser.str.match(pat, case=case, na=na)
+    result = ser.str.fullmatch(pat, case=case, na=na)
     expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -102,6 +102,10 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
             ri = df.select_dtypes(include=[str])
             tm.assert_frame_equal(ri, ei)
 
+        ri = df.select_dtypes(include=["object"])
+        ei = df[["a"]]
+        tm.assert_frame_equal(ri, ei)
+
     def test_select_dtypes_exclude_using_list_like(self):
         df = DataFrame(
             {
@@ -309,17 +313,15 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_strin
         df["g"] = df.f.diff()
         assert not hasattr(np, "u8")
         r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
-        if using_infer_string:
-            e = df[["b"]]
-        else:
-            e = df[["a", "b"]]
+        # if using_infer_string:
+        #     TODO warn
+        e = df[["a", "b"]]
         tm.assert_frame_equal(r, e)
 
         r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
-        if using_infer_string:
-            e = df[["b", "g"]]
-        else:
-            e = df[["a", "b", "g"]]
+        # if using_infer_string:
+        #     TODO warn
+        e = df[["a", "b", "g"]]
         tm.assert_frame_equal(r, e)
 
     def test_select_dtypes_empty(self):
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
@@ -1075,6 +1075,44 @@ def test_fullmatch_compiled_regex(any_string_dtype):
         values.str.fullmatch(re.compile("ab"), flags=re.IGNORECASE)
 
 
+@pytest.mark.parametrize(
+    "pat, case, na, exp",
+    # Note: keep cases in sync with
+    # pandas/tests/extension/test_arrow.py::test_str_fullmatch
+    [
+        ["abc", False, None, [True, False, False, None]],
+        ["Abc", True, None, [False, False, False, None]],
+        ["bc", True, None, [False, False, False, None]],
+        ["ab", False, None, [False, False, False, None]],
+        ["a[a-z]{2}", False, None, [True, False, False, None]],
+        ["A[a-z]{1}", True, None, [False, False, False, None]],
+        # GH Issue: #56652
+        ["abc$", False, None, [True, False, False, None]],
+        ["abc\\$", False, None, [False, True, False, None]],
+        ["Abc$", True, None, [False, False, False, None]],
+        ["Abc\\$", True, None, [False, False, False, None]],
+        # https://github.com/pandas-dev/pandas/issues/61072
+        ["(abc)|(abx)", True, None, [True, False, False, None]],
+        ["((abc)|(abx))", True, None, [True, False, False, None]],
+    ],
+)
+def test_str_fullmatch_extra_cases(any_string_dtype, pat, case, na, exp):
+    ser = Series(["abc", "abc$", "$abc", None], dtype=any_string_dtype)
+    result = ser.str.fullmatch(pat, case=case, na=na)
+
+    if any_string_dtype == "str":
+        # NaN propagates as False
+        exp[-1] = False
+        expected_dtype = bool
+    else:
+        expected_dtype = (
+            "object" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
+        )
+        expected = Series([True, False, np.nan, False], dtype=expected_dtype)
+    expected = Series(exp, dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+
 # --------------------------------------------------------------------------------------
 # str.findall
 # --------------------------------------------------------------------------------------

Original file line number	Diff line number	Diff line change
`@@ -226,6 +226,7 @@ Other enhancements`
`226`	`226`	- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
`227`	`227`	- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
`228`	`228`	- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
	`229`	+- Switched wheel upload to PyPI Trusted Publishing (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
`229`	`230`	`-`
`230`	`231`
`231`	`232`	`.. ---------------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -865,7 +865,9 @@ def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:`
`865`	`865`	`np.dtype("<U").type, # type: ignore[arg-type]`
`866`	`866`	`}`
`867`	`867`	`if non_string_dtypes != dtype_set:`
`868`		`- raise TypeError("string dtypes are not allowed, use 'object' instead")`
	`868`	`+ raise TypeError(`
	`869`	`+ "numpy string dtypes are not allowed, use 'str' or 'object' instead"`
	`870`	`+ )`
`869`	`871`
`870`	`872`
`871`	`873`	`def coerce_indexer_dtype(indexer, categories) -> np.ndarray:`