Skip to content
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.3.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Bug fixes
"string" type in the JSON Table Schema for :class:`StringDtype` columns
(:issue:`61889`)
- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`)
- Fixed ``~Series.str.match`` and ``~Series.str.fullmatch`` with compiled regex
for the Arrow-backed string dtype (:issue:`61964`)

.. ---------------------------------------------------------------------------
.. _whatsnew_232.contributors:
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,23 +302,29 @@ def _str_contains(

def _str_match(
self,
pat: str,
pat: str | re.Pattern,
case: bool = True,
flags: int = 0,
na: Scalar | lib.NoDefault = lib.no_default,
):
if not pat.startswith("^"):
if isinstance(pat, re.Pattern):
# GH#61952
pat = pat.pattern
if isinstance(pat, str) and not pat.startswith("^"):
pat = f"^{pat}"
return self._str_contains(pat, case, flags, na, regex=True)

def _str_fullmatch(
self,
pat,
pat: str | re.Pattern,
case: bool = True,
flags: int = 0,
na: Scalar | lib.NoDefault = lib.no_default,
):
if not pat.endswith("$") or pat.endswith("\\$"):
if isinstance(pat, re.Pattern):
# GH#61952
pat = pat.pattern
if isinstance(pat, str) and (not pat.endswith("$") or pat.endswith("\\$")):
pat = f"{pat}$"
return self._str_match(pat, case, flags, na)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,8 +1361,8 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):

Parameters
----------
pat : str
Character sequence.
pat : str or compiled regex
Character sequence or regular expression.
case : bool, default True
If True, case sensitive.
flags : int, default 0 (no flags)
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,15 @@ def rep(x, r):

def _str_match(
self,
pat: str,
pat: str | re.Pattern,
case: bool = True,
flags: int = 0,
na: Scalar | lib.NoDefault = lib.no_default,
):
if not case:
flags |= re.IGNORECASE

if isinstance(pat, re.Pattern):
pat = pat.pattern
regex = re.compile(pat, flags=flags)

f = lambda x: regex.match(x) is not None
Expand All @@ -270,7 +271,8 @@ def _str_fullmatch(
):
if not case:
flags |= re.IGNORECASE

if isinstance(pat, re.Pattern):
pat = pat.pattern
regex = re.compile(pat, flags=flags)

f = lambda x: regex.fullmatch(x) is not None
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,17 @@ def test_match_case_kwarg(any_string_dtype):
tm.assert_series_equal(result, expected)


def test_match_compiled_regex(any_string_dtype):
# GH#61952
values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
result = values.str.match(re.compile(r"ab"), case=False)
expected_dtype = (
np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
)
expected = Series([True, True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)


# --------------------------------------------------------------------------------------
# str.fullmatch
# --------------------------------------------------------------------------------------
Expand Down Expand Up @@ -887,6 +898,17 @@ def test_fullmatch_case_kwarg(any_string_dtype):
tm.assert_series_equal(result, expected)


def test_fullmatch_compiled_regex(any_string_dtype):
# GH#61952
values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
result = values.str.fullmatch(re.compile(r"ab"), case=False)
expected_dtype = (
np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
)
expected = Series([True, True, False, False], dtype=expected_dtype)
tm.assert_series_equal(result, expected)


# --------------------------------------------------------------------------------------
# str.findall
# --------------------------------------------------------------------------------------
Expand Down
Loading