Skip to content

Commit

Permalink
REFACTOR-modin-project#7285: Remove deprecated configs
Browse files Browse the repository at this point in the history
Signed-off-by: Igoshev, Iaroslav <[email protected]>
  • Loading branch information
YarShev committed May 21, 2024
1 parent 43eff5b commit 8a24845
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 277 deletions.
8 changes: 0 additions & 8 deletions modin/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
DocModule,
Engine,
EnvironmentVariable,
ExperimentalGroupbyImpl,
ExperimentalNumPyAPI,
GithubCI,
GpuCount,
IsDebug,
Expand All @@ -43,7 +41,6 @@
PersistentPickle,
ProgressBar,
RangePartitioning,
RangePartitioningGroupby,
RayInitCustomResources,
RayRedisAddress,
RayRedisPassword,
Expand All @@ -54,7 +51,6 @@
TestReadFromPostgres,
TestReadFromSqlServer,
TrackFileLeaks,
use_range_partitioning_groupby,
)
from modin.config.pubsub import Parameter, ValueSource, context

Expand Down Expand Up @@ -91,11 +87,7 @@
"BenchmarkMode",
"PersistentPickle",
"ModinNumpy",
"ExperimentalNumPyAPI",
"RangePartitioningGroupby",
"RangePartitioning",
"use_range_partitioning_groupby",
"ExperimentalGroupbyImpl",
"AsyncReadMode",
"ReadSqlEngine",
"IsExperimental",
Expand Down
96 changes: 1 addition & 95 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,40 +683,12 @@ class GithubCI(EnvironmentVariable, type=bool):
default = False


class ModinNumpy(EnvWithSibilings, type=bool):
class ModinNumpy(EnvironmentVariable, type=bool):
"""Set to true to use Modin's implementation of NumPy API."""

varname = "MODIN_NUMPY"
default = False

@classmethod
def _sibling(cls) -> type[EnvWithSibilings]:
"""Get a parameter sibling."""
return ExperimentalNumPyAPI


class ExperimentalNumPyAPI(EnvWithSibilings, type=bool):
"""
Set to true to use Modin's implementation of NumPy API.
This parameter is deprecated. Use ``ModinNumpy`` instead.
"""

varname = "MODIN_EXPERIMENTAL_NUMPY_API"
default = False

@classmethod
def _sibling(cls) -> type[EnvWithSibilings]:
"""Get a parameter sibling."""
return ModinNumpy


# Let the parameter's handling logic know that this variable is deprecated and that
# we should raise respective warnings
ExperimentalNumPyAPI._deprecation_descriptor = DeprecationDescriptor(
ExperimentalNumPyAPI, ModinNumpy
)


class RangePartitioning(EnvironmentVariable, type=bool):
"""
Expand All @@ -730,72 +702,6 @@ class RangePartitioning(EnvironmentVariable, type=bool):
default = False


class RangePartitioningGroupby(EnvWithSibilings, type=bool):
"""
Set to true to use Modin's range-partitioning group by implementation.
This parameter is deprecated. Use ``RangePartitioning`` instead.
"""

varname = "MODIN_RANGE_PARTITIONING_GROUPBY"
default = False

@classmethod
def _sibling(cls) -> type[EnvWithSibilings]:
"""Get a parameter sibling."""
return ExperimentalGroupbyImpl


# Let the parameter's handling logic know that this variable is deprecated and that
# we should raise respective warnings
RangePartitioningGroupby._deprecation_descriptor = DeprecationDescriptor(
RangePartitioningGroupby, RangePartitioning
)


class ExperimentalGroupbyImpl(EnvWithSibilings, type=bool):
"""
Set to true to use Modin's range-partitioning group by implementation.
This parameter is deprecated. Use ``RangePartitioning`` instead.
"""

varname = "MODIN_EXPERIMENTAL_GROUPBY"
default = False

@classmethod
def _sibling(cls) -> type[EnvWithSibilings]:
"""Get a parameter sibling."""
return RangePartitioningGroupby


# Let the parameter's handling logic know that this variable is deprecated and that
# we should raise respective warnings
ExperimentalGroupbyImpl._deprecation_descriptor = DeprecationDescriptor(
ExperimentalGroupbyImpl, RangePartitioningGroupby
)


def use_range_partitioning_groupby() -> bool:
"""
Determine whether range-partitioning implementation for groupby was enabled by a user.
This is a temporary helper function that queries ``RangePartitioning`` and deprecated
``RangePartitioningGroupby`` config variables in order to determine whether to range-part
impl for groupby is enabled. Eventially this function should be removed together with
``RangePartitioningGroupby`` variable.
Returns
-------
bool
"""
with warnings.catch_warnings():
# filter deprecation warning, it was already showed when a user set the variable
warnings.filterwarnings("ignore", category=FutureWarning)
old_range_part_var = RangePartitioningGroupby.get()
return RangePartitioning.get() or old_range_part_var


class CIAWSSecretAccessKey(EnvironmentVariable, type=str):
"""Set to AWS_SECRET_ACCESS_KEY when running mock S3 tests for Modin in GitHub CI."""

Expand Down
4 changes: 2 additions & 2 deletions modin/core/storage_formats/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import pandas
from pandas.core.dtypes.cast import find_common_type

from modin.config import use_range_partitioning_groupby
from modin.config import RangePartitioning
from modin.core.dataframe.algebra import GroupByReduce
from modin.error_message import ErrorMessage
from modin.utils import hashable
Expand Down Expand Up @@ -94,7 +94,7 @@ def build_qc_method(cls, agg_name, finalizer_fn=None):
)

def method(query_compiler, *args, **kwargs):
if use_range_partitioning_groupby():
if RangePartitioning.get():
try:
if finalizer_fn is not None:
raise NotImplementedError(
Expand Down
10 changes: 5 additions & 5 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from pandas.core.indexing import check_bool_indexer
from pandas.errors import DataError

from modin.config import CpuCount, RangePartitioning, use_range_partitioning_groupby
from modin.config import CpuCount, RangePartitioning
from modin.core.dataframe.algebra import (
Binary,
Fold,
Expand Down Expand Up @@ -3625,7 +3625,7 @@ def groupby_nth(
return result

def groupby_mean(self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False):
if use_range_partitioning_groupby():
if RangePartitioning.get():
try:
return self._groupby_shuffle(
by=by,
Expand Down Expand Up @@ -3696,7 +3696,7 @@ def groupby_size(
agg_kwargs,
drop=False,
):
if use_range_partitioning_groupby():
if RangePartitioning.get():
try:
return self._groupby_shuffle(
by=by,
Expand Down Expand Up @@ -4117,7 +4117,7 @@ def groupby_agg(
# 'group_wise' means 'groupby.apply()'. We're certain that range-partitioning groupby
# always works better for '.apply()', so we're using it regardless of the 'RangePartitioning'
# value
if how == "group_wise" or use_range_partitioning_groupby():
if how == "group_wise" or RangePartitioning.get():
try:
return self._groupby_shuffle(
by=by,
Expand All @@ -4138,7 +4138,7 @@ def groupby_agg(
+ "\nFalling back to a full-axis implementation."
)
get_logger().info(message)
if use_range_partitioning_groupby():
if RangePartitioning.get():
ErrorMessage.warn(message)

if isinstance(agg_func, dict) and GroupbyReduceImpl.has_impl_for(agg_func):
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
from pandas.io.parsers import TextFileReader
from pandas.io.parsers.readers import _c_parser_defaults

from modin.config import ExperimentalNumPyAPI
from modin.config import ModinNumpy
from modin.error_message import ErrorMessage
from modin.logging import ClassLogger, enable_logging
from modin.utils import (
Expand Down Expand Up @@ -1146,7 +1146,7 @@ def to_numpy(
if isinstance(modin_obj, SupportsPrivateToNumPy):
return modin_obj._to_numpy()
array = modin_obj.to_numpy()
if ExperimentalNumPyAPI.get():
if ModinNumpy.get():
array = array._to_numpy()
return array

Expand Down
150 changes: 0 additions & 150 deletions modin/tests/config/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# governing permissions and limitations under the License.

import os
import unittest.mock
import warnings

import pytest

Expand Down Expand Up @@ -121,154 +119,6 @@ def test_ray_cluster_resources():
assert ray.cluster_resources()["special_hardware"] == 1.0


@pytest.mark.parametrize(
"deprecated_var, new_var",
[
(cfg.ExperimentalGroupbyImpl, cfg.RangePartitioning),
(cfg.ExperimentalNumPyAPI, cfg.ModinNumpy),
(cfg.RangePartitioningGroupby, cfg.RangePartitioning),
],
)
def test_deprecated_bool_vars_warnings(deprecated_var, new_var):
"""Test that deprecated parameters are raising `FutureWarnings` and their replacements don't."""
old_depr_val = deprecated_var.get()
old_new_var = new_var.get()

try:
reset_vars(deprecated_var, new_var)
with pytest.warns(FutureWarning):
deprecated_var.get()

with pytest.warns(FutureWarning):
deprecated_var.put(False)

with unittest.mock.patch.dict(os.environ, {deprecated_var.varname: "1"}):
with pytest.warns(FutureWarning):
_check_vars()

# check that the new var doesn't raise any warnings
reset_vars(deprecated_var, new_var)
with warnings.catch_warnings():
warnings.simplefilter("error")
new_var.get()
new_var.put(False)
with unittest.mock.patch.dict(os.environ, {new_var.varname: "1"}):
_check_vars()
finally:
deprecated_var.put(old_depr_val)
new_var.put(old_new_var)


@pytest.mark.parametrize(
"deprecated_var, new_var",
[
(cfg.ExperimentalGroupbyImpl, cfg.RangePartitioningGroupby),
(cfg.ExperimentalNumPyAPI, cfg.ModinNumpy),
],
)
@pytest.mark.parametrize("get_depr_first", [True, False])
def test_deprecated_bool_vars_equals(deprecated_var, new_var, get_depr_first):
"""
Test that deprecated parameters always have values equal to the new replacement parameters.
Parameters
----------
deprecated_var : EnvironmentVariable
new_var : EnvironmentVariable
get_depr_first : bool
Defines an order in which the ``.get()`` method should be called when comparing values.
If ``True``: get deprecated value at first ``deprecated_var.get() == new_var.get() == value``.
If ``False``: get new value at first ``new_var.get() == deprecated_var.get() == value``.
The logic of the ``.get()`` method depends on which parameter was initialized first,
that's why it's worth testing both cases.
"""
old_depr_val = deprecated_var.get()
old_new_var = new_var.get()

def get_values():
return (
(deprecated_var.get(), new_var.get())
if get_depr_first
else (new_var.get(), deprecated_var.get())
)

try:
# case1: initializing the value using 'deprecated_var'
reset_vars(deprecated_var, new_var)
deprecated_var.put(True)
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

new_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)

new_var.put(True)
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

deprecated_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)

# case2: initializing the value using 'new_var'
reset_vars(deprecated_var, new_var)
new_var.put(True)
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

deprecated_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)

deprecated_var.put(True)
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

new_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)

# case3: initializing the value using 'deprecated_var' with env variable
reset_vars(deprecated_var, new_var)
with unittest.mock.patch.dict(os.environ, {deprecated_var.varname: "True"}):
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

new_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)

new_var.put(True)
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

deprecated_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)

# case4: initializing the value using 'new_var' with env variable
reset_vars(deprecated_var, new_var)
with unittest.mock.patch.dict(os.environ, {new_var.varname: "True"}):
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

deprecated_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)

deprecated_var.put(True)
val1, val2 = get_values()
assert val1 == val2 == True # noqa: E712 ('obj == True' comparison)

new_var.put(False)
val1, val2 = get_values()
assert val1 == val2 == False # noqa: E712 ('obj == False' comparison)
finally:
deprecated_var.put(old_depr_val)
new_var.put(old_new_var)


@pytest.mark.parametrize(
"modify_config",
[{cfg.RangePartitioning: False, cfg.LazyExecution: "Auto"}],
Expand Down
Loading

0 comments on commit 8a24845

Please sign in to comment.