pandas-dev · adrienpacifico · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -69,6 +69,7 @@ def concat_compat(
     -------
     a single array, preserving the combined dtypes
     """
+
     if len(to_concat) and lib.dtypes_all_equal([obj.dtype for obj in to_concat]):
         # fastpath!
         obj = to_concat[0]
@@ -92,6 +93,27 @@ def concat_compat(
                 to_concat_eas,
                 axis=axis,  # type: ignore[call-arg]
             )
+    # Special handling for categorical arrays solves #51362
+    if (
+        len(to_concat)
+        and all(isinstance(arr.dtype, CategoricalDtype) for arr in to_concat)
+        and axis == 0
+    ):
+        # Filter out empty arrays before union, similar to non_empties logic
+        non_empty_categoricals = [x for x in to_concat if _is_nonempty(x, axis)]
+
+        if len(non_empty_categoricals) == 0:
+            # All arrays are empty, return the first one (they're all categorical)
+            return to_concat[0]
+        elif len(non_empty_categoricals) == 1:
+            # Only one non-empty array, return it directly
+            return non_empty_categoricals[0]
+        else:
+            # Multiple non-empty arrays, use union_categoricals
+            return union_categoricals(
+                non_empty_categoricals, sort_categories=True
+            )  # Performance cost, but necessary to keep tests passing.
+            # see pandas/tests/reshape/concat/test_append_common.py:498
 
     # If all arrays are empty, there's nothing to convert, just short-cut to
     # the concatenation, #3121.

diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py
@@ -3,7 +3,10 @@
 import pandas.core.dtypes.concat as _concat
 
 import pandas as pd
-from pandas import Series
+from pandas import (
+    DataFrame,
+    Series,
+)
 import pandas._testing as tm
 
 
@@ -14,12 +17,12 @@ def test_concat_mismatched_categoricals_with_empty():
 
     result = _concat.concat_compat([ser1._values, ser2._values])
     expected = pd.concat([ser1, ser2])._values
-    tm.assert_numpy_array_equal(result, expected)
+    tm.assert_categorical_equal(result, expected)
 
 
 def test_concat_single_dataframe_tz_aware():
     # https://github.com/pandas-dev/pandas/issues/25257
-    df = pd.DataFrame(
+    df = DataFrame(
         {"timestamp": [pd.Timestamp("2020-04-08 09:00:00.709949+0000", tz="UTC")]}
     )
     expected = df.copy()
@@ -53,7 +56,7 @@ def test_concat_series_between_empty_and_tzaware_series(using_infer_string):
     ser2 = Series(dtype=float)
 
     result = pd.concat([ser1, ser2], axis=1)
-    expected = pd.DataFrame(
+    expected = DataFrame(
         data=[
             (0.0, None),
         ],
@@ -64,3 +67,21 @@ def test_concat_series_between_empty_and_tzaware_series(using_infer_string):
         dtype=float,
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_concat_categorical_dataframes():
+    df = DataFrame({"a": [0, 1]}, dtype="category")
+    df2 = DataFrame({"a": [2, 3]}, dtype="category")
+
+    result = pd.concat([df, df2], axis=0)
+
+    assert result["a"].dtype.name == "category"
+
+
+def test_concat_categorical_series():
+    ser = Series([0, 1], dtype="category")
+    ser2 = Series([2, 3], dtype="category")
+
+    result = pd.concat([ser, ser2], axis=0)
+
+    assert result.dtype.name == "category"
diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py
@@ -486,15 +486,17 @@ def test_concat_categorical(self):
         s1 = Series([3, 2], dtype="category")
         s2 = Series([2, 1], dtype="category")
 
-        exp = Series([3, 2, 2, 1])
+        exp = Series([3, 2, 2, 1], dtype="category")  # should remain category
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 
         # completely different categories (same dtype) => not-category
-        s1 = Series([10, 11, np.nan], dtype="category")
-        s2 = Series([np.nan, 1, 3, 2], dtype="category")
+        s1 = Series([10.0, 11.0, np.nan], dtype="category")
+        s2 = Series([np.nan, 1.0, 3.0, 2.0], dtype="category")
 
-        exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64)
+        exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64).astype(
+            "category"
+        )
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 
@@ -694,7 +696,7 @@ def test_concat_categorical_empty(self):
 
         s1 = Series([], dtype="category")
         s2 = Series([1, 2], dtype="category")
-        exp = s2.astype(object)
+        exp = s2
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 
@@ -743,7 +745,13 @@ def test_categorical_concat_append(self):
         df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
 
         res = pd.concat([df, df_different_categories], ignore_index=True)
-        exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
+        exp = DataFrame(
+            {
+                "cats": Categorical(list("abab"), categories=["a", "b", "c"]),
+                "vals": [1, 2, 1, 2],
+            }
+        )  # I do not agree with the test made in #37243
+
         tm.assert_frame_equal(res, exp)
 
         res = df._append(df_different_categories, ignore_index=True)

diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py
@@ -7,6 +7,7 @@
 import pandas as pd
 from pandas import (
     Categorical,
+    CategoricalIndex,
     DataFrame,
     Series,
 )
@@ -75,13 +76,13 @@ def test_concat_categoricalindex(self):
         # GH 16111, categories that aren't lexsorted
         categories = [9, 0, 1, 2, 3]
 
-        a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories))
-        b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories))
-        c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories))
+        a = Series(1, index=CategoricalIndex([9, 0], categories=categories))
+        b = Series(2, index=CategoricalIndex([0, 1], categories=categories))
+        c = Series(3, index=CategoricalIndex([1, 2], categories=categories))
 
         result = pd.concat([a, b, c], axis=1)
 
-        exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
+        exp_idx = CategoricalIndex([9, 0, 1, 2], categories=categories)
         exp = DataFrame(
             {
                 0: [1, 1, np.nan, np.nan],
@@ -99,7 +100,7 @@ def test_categorical_concat_preserve(self):
         s = Series(list("abc"), dtype="category")
         s2 = Series(list("abd"), dtype="category")
 
-        exp = Series(list("abcabd"))
+        exp = Series(list("abcabd"), dtype="category")
         res = pd.concat([s, s2], ignore_index=True)
         tm.assert_series_equal(res, exp)
 
@@ -147,8 +148,8 @@ def test_categorical_index_preserver(self):
         result = pd.concat([df2, df3])
         expected = pd.concat(
             [
-                df2.set_axis(df2.index.astype(object), axis=0),
-                df3.set_axis(df3.index.astype(object), axis=0),
+                df2.set_axis(df2.index.astype("category"), axis=0),
+                df3.set_axis(df3.index.astype("category"), axis=0),
             ]
         )
         tm.assert_frame_equal(result, expected)
@@ -179,7 +180,8 @@ def test_concat_categorical_datetime(self):
 
         result = pd.concat([df1, df2])
         expected = DataFrame(
-            {"x": Series([datetime(2021, 1, 1), datetime(2021, 1, 2)])}
+            {"x": Series([datetime(2021, 1, 1), datetime(2021, 1, 2)])},
+            dtype="category",
         )
 
         tm.assert_equal(result, expected)
@@ -227,15 +229,17 @@ def test_categorical_index_upcast(self):
         b = DataFrame({"foo": [4, 3]}, index=Categorical(["baz", "bar"]))
 
         res = pd.concat([a, b])
-        exp = DataFrame({"foo": [1, 2, 4, 3]}, index=["foo", "bar", "baz", "bar"])
+        exp = DataFrame(
+            {"foo": [1, 2, 4, 3]}, index=Categorical(["foo", "bar", "baz", "bar"])
+        )
 
         tm.assert_equal(res, exp)
 
         a = Series([1, 2], index=Categorical(["foo", "bar"]))
         b = Series([4, 3], index=Categorical(["baz", "bar"]))
 
         res = pd.concat([a, b])
-        exp = Series([1, 2, 4, 3], index=["foo", "bar", "baz", "bar"])
+        exp = Series([1, 2, 4, 3], index=Categorical(["foo", "bar", "baz", "bar"]))
 
         tm.assert_equal(res, exp)
 
@@ -257,9 +261,9 @@ def test_categorical_missing_from_one_frame(self):
     def test_concat_categorical_same_categories_different_order(self):
         # https://github.com/pandas-dev/pandas/issues/24845
 
-        c1 = pd.CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False)
-        c2 = pd.CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False)
-        c3 = pd.CategoricalIndex(
+        c1 = CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False)
+        c2 = CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False)
+        c3 = CategoricalIndex(
             ["a", "a", "b", "b"], categories=["a", "b"], ordered=False
         )