Skip to content

Commit

Permalink
Address comment
Browse files Browse the repository at this point in the history
Signed-off-by: Igoshev, Iaroslav <[email protected]>
  • Loading branch information
YarShev committed Jun 26, 2024
1 parent 8922030 commit f193bd7
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 2 deletions.
4 changes: 4 additions & 0 deletions modin/core/dataframe/algebra/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ def register(
cls,
func: Callable[..., pandas.DataFrame],
join_type: str = "outer",
sort: bool = None,
labels: str = "replace",
infer_dtypes: Optional[str] = None,
) -> Callable[..., PandasQueryCompiler]:
Expand All @@ -310,6 +311,8 @@ def register(
Binary function to execute. Have to be able to accept at least two arguments.
join_type : {'left', 'right', 'outer', 'inner', None}, default: 'outer'
Type of join that will be used if indices of operands are not aligned.
sort : bool, default: None
Whether to sort index and columns or not.
labels : {"keep", "replace", "drop"}, default: "replace"
Whether keep labels from left Modin DataFrame, replace them with labels
from joined DataFrame or drop altogether to make them be computed lazily later.
Expand Down Expand Up @@ -419,6 +422,7 @@ def caller(
lambda x, y: func(x, y, *args, **kwargs),
[other._modin_frame],
join_type=join_type,
sort=sort,
labels=labels,
dtypes=dtypes,
),
Expand Down
23 changes: 21 additions & 2 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3823,6 +3823,7 @@ def n_ary_op(
op,
right_frames: list[PandasDataframe],
join_type="outer",
sort=None,
copartition_along_columns=True,
labels="replace",
dtypes: Optional[pandas.Series] = None,
Expand All @@ -3838,6 +3839,8 @@ def n_ary_op(
Modin DataFrames to join with.
join_type : str, default: "outer"
Type of join to apply.
sort : bool, default: None
Whether to sort index and columns or not.
copartition_along_columns : bool, default: True
Whether to perform copartitioning along columns or not.
For some ops this isn't needed (e.g., `fillna`).
Expand All @@ -3854,7 +3857,16 @@ def n_ary_op(
New Modin DataFrame.
"""
left_parts, list_of_right_parts, joined_index, row_lengths = self._copartition(
0, right_frames, join_type, sort=True
0,
right_frames,
join_type,
sort=(
not all(
self.get_axis(0).equals(right.get_axis(0)) for right in right_frames
)
if sort is None
else sort
),
)
if copartition_along_columns:
new_left_frame = self.__constructor__(
Expand Down Expand Up @@ -3886,7 +3898,14 @@ def n_ary_op(
1,
new_right_frames,
join_type,
sort=False,
sort=(
not all(
self.get_axis(1).equals(right.get_axis(1))
for right in new_right_frames
)
if sort is None
else sort
),
)
else:
joined_columns = self.copy_columns_cache(copy_lengths=True)
Expand Down
2 changes: 2 additions & 0 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,13 +460,15 @@ def to_numpy(self, **kwargs):
df_update = Binary.register(
copy_df_for_func(pandas.DataFrame.update, display_name="update"),
join_type="left",
sort=False,
)
series_update = Binary.register(
copy_df_for_func(
lambda x, y: pandas.Series.update(x.squeeze(axis=1), y.squeeze(axis=1)),
display_name="update",
),
join_type="left",
sort=False,
)

# Needed for numpy API
Expand Down
27 changes: 27 additions & 0 deletions modin/tests/pandas/dataframe/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,3 +527,30 @@ def test_arithmetic_with_tricky_dtypes(val1, val2, op, request):
lambda dfs: getattr(dfs[0], op)(dfs[1]),
expected_exception=expected_exception,
)


@pytest.mark.parametrize(
"data, other_data",
[
({"A": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "C": [7, 8, 9]}),
({"C": [1, 2, 3], "B": [400, 500, 600]}, {"B": [4, 5, 6], "A": [7, 8, 9]}),
],
)
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("match_index", [True, False])
def test_bin_op_mismatched_columns(data, other_data, axis, match_index):
modin_df, pandas_df = create_test_dfs(data)
other_modin_df, other_pandas_df = create_test_dfs(other_data)
if axis == 0:
if not match_index:
modin_df.index = pandas_df.index = ["1", "2", "3"]
other_modin_df.index = other_pandas_df.index = ["2", "1", "3"]
eval_general(
modin_df,
pandas_df,
lambda df: (
df.add(other_modin_df, axis=axis)
if isinstance(df, pd.DataFrame)
else df.add(other_pandas_df, axis=axis)
),
)

0 comments on commit f193bd7

Please sign in to comment.