Address comment

Signed-off-by: Igoshev, Iaroslav <[email protected]>
YarShev · Jun 26, 2024 · f193bd7 · f193bd7
1 parent 8922030
commit f193bd7
Show file tree

Hide file tree

Showing 4 changed files with 54 additions and 2 deletions.
diff --git a/modin/core/dataframe/algebra/binary.py b/modin/core/dataframe/algebra/binary.py
@@ -298,6 +298,7 @@ def register(
         cls,
         func: Callable[..., pandas.DataFrame],
         join_type: str = "outer",
+        sort: bool = None,
         labels: str = "replace",
         infer_dtypes: Optional[str] = None,
     ) -> Callable[..., PandasQueryCompiler]:
@@ -310,6 +311,8 @@ def register(
             Binary function to execute. Have to be able to accept at least two arguments.
         join_type : {'left', 'right', 'outer', 'inner', None}, default: 'outer'
             Type of join that will be used if indices of operands are not aligned.
+        sort : bool, default: None
+            Whether to sort index and columns or not.
         labels : {"keep", "replace", "drop"}, default: "replace"
             Whether keep labels from left Modin DataFrame, replace them with labels
             from joined DataFrame or drop altogether to make them be computed lazily later.
@@ -419,6 +422,7 @@ def caller(
                             lambda x, y: func(x, y, *args, **kwargs),
                             [other._modin_frame],
                             join_type=join_type,
+                            sort=sort,
                             labels=labels,
                             dtypes=dtypes,
                         ),

diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py
@@ -3823,6 +3823,7 @@ def n_ary_op(
         op,
         right_frames: list[PandasDataframe],
         join_type="outer",
+        sort=None,
         copartition_along_columns=True,
         labels="replace",
         dtypes: Optional[pandas.Series] = None,
@@ -3838,6 +3839,8 @@ def n_ary_op(
             Modin DataFrames to join with.
         join_type : str, default: "outer"
             Type of join to apply.
+        sort : bool, default: None
+            Whether to sort index and columns or not.
         copartition_along_columns : bool, default: True
             Whether to perform copartitioning along columns or not.
             For some ops this isn't needed (e.g., `fillna`).
@@ -3854,7 +3857,16 @@ def n_ary_op(
             New Modin DataFrame.
         """
         left_parts, list_of_right_parts, joined_index, row_lengths = self._copartition(
-            0, right_frames, join_type, sort=True
+            0,
+            right_frames,
+            join_type,
+            sort=(
+                not all(
+                    self.get_axis(0).equals(right.get_axis(0)) for right in right_frames
+                )
+                if sort is None
+                else sort
+            ),
         )
         if copartition_along_columns:
             new_left_frame = self.__constructor__(
@@ -3886,7 +3898,14 @@ def n_ary_op(
                 1,
                 new_right_frames,
                 join_type,
-                sort=False,
+                sort=(
+                    not all(
+                        self.get_axis(1).equals(right.get_axis(1))
+                        for right in new_right_frames
+                    )
+                    if sort is None
+                    else sort
+                ),
             )
         else:
             joined_columns = self.copy_columns_cache(copy_lengths=True)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
@@ -460,13 +460,15 @@ def to_numpy(self, **kwargs):
     df_update = Binary.register(
         copy_df_for_func(pandas.DataFrame.update, display_name="update"),
         join_type="left",
+        sort=False,
     )
     series_update = Binary.register(
         copy_df_for_func(
             lambda x, y: pandas.Series.update(x.squeeze(axis=1), y.squeeze(axis=1)),
             display_name="update",
         ),
         join_type="left",
+        sort=False,
     )
 
     # Needed for numpy API

diff --git a/modin/tests/pandas/dataframe/test_binary.py b/modin/tests/pandas/dataframe/test_binary.py
@@ -527,3 +527,30 @@ def test_arithmetic_with_tricky_dtypes(val1, val2, op, request):
         lambda dfs: getattr(dfs[0], op)(dfs[1]),
         expected_exception=expected_exception,
     )
+
+
+@pytest.mark.parametrize(
+    "data, other_data",
+    [
+        ({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "C": [7, 8, 9]}),
+        ({"C": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "A": [7, 8, 9]}),
+    ],
+)
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize("match_index", [True, False])
+def test_bin_op_mismatched_columns(data, other_data, axis, match_index):
+    modin_df, pandas_df = create_test_dfs(data)
+    other_modin_df, other_pandas_df = create_test_dfs(other_data)
+    if axis == 0:
+        if not match_index:
+            modin_df.index = pandas_df.index = ["1", "2", "3"]
+            other_modin_df.index = other_pandas_df.index = ["2", "1", "3"]
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: (
+            df.add(other_modin_df, axis=axis)
+            if isinstance(df, pd.DataFrame)
+            else df.add(other_pandas_df, axis=axis)
+        ),
+    )