Skip to content

Commit 7939b0e

Browse files
authored
dependencies: Allow Pandas 2.x (#2210)
* Allow Pandas 2.x * Minor test fixes * Update extension types mappings * Revert "Update extension types mappings" This reverts commit 6fbd99b. * Fix tests * Add warning for Pandas 2.x * Improve wwarning text
1 parent 23d579e commit 7939b0e

File tree

7 files changed

+38
-13
lines changed

7 files changed

+38
-13
lines changed

awswrangler/pandas/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
"""Pandas "proxy" package."""
2+
import logging
23
from typing import TYPE_CHECKING
34

5+
from packaging import version
6+
from pandas import __version__ as _PANDAS_VERSION
7+
48
from awswrangler._distributed import MemoryFormatEnum, memory_format
59

610
if TYPE_CHECKING or memory_format.get() == MemoryFormatEnum.PANDAS:
@@ -38,6 +42,13 @@
3842
else:
3943
raise ImportError(f"Unknown memory format {memory_format}")
4044

45+
_logger: logging.Logger = logging.getLogger(__name__)
46+
47+
if version.parse(_PANDAS_VERSION) >= version.parse("2.0.0"):
48+
_logger.warning(
49+
"Pandas version 2.x was detected. Please note awswrangler currently does not support pyarrow-backed ArrowDtype "
50+
"DataFrames."
51+
)
4152

4253
__all__ = [
4354
"DataFrame",

poetry.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ python = ">=3.8, <4.0"
3030
# Required
3131
boto3 = "^1.20.32"
3232
botocore = "^1.23.32"
33-
pandas = "^1.2.0, !=1.5.0" # Exclusion per: https://github.com/aws/aws-sdk-pandas/issues/1678
33+
pandas = ">=1.2.0,!=1.5.0,<3.0.0" # Exclusion per: https://github.com/aws/aws-sdk-pandas/issues/1678
3434
numpy = "^1.18"
3535
pyarrow = ">=7.0.0"
3636
typing-extensions = "^4.4.0"

tests/_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import boto3
1212
import botocore.exceptions
13+
from packaging import version
1314
from pandas import DataFrame as PandasDataFrame
1415
from pandas import Series as PandasSeries
1516
from pytest import FixtureRequest
@@ -19,6 +20,7 @@
1920
from awswrangler._utils import try_it
2021

2122
is_ray_modin = wr.engine.get() == EngineEnum.RAY and wr.memory_format.get() == MemoryFormatEnum.MODIN
23+
is_pandas_2_x = False
2224

2325
if is_ray_modin:
2426
import modin.pandas as pd
@@ -27,6 +29,9 @@
2729
else:
2830
import pandas as pd
2931

32+
if version.parse(pd.__version__) >= version.parse("2.0.0"):
33+
is_pandas_2_x = True
34+
3035

3136
CFN_VALID_STATUS = ["CREATE_COMPLETE", "ROLLBACK_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]
3237

tests/unit/test_athena.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,7 @@ def test_athena_date_recovery(path, glue_database, glue_table):
13861386
"date3": [datetime.date(3099, 1, 3), datetime.date(3099, 1, 4), datetime.date(4080, 1, 5)],
13871387
}
13881388
)
1389-
df["date1"] = df["date1"].astype("datetime64")
1389+
df["date1"] = df["date1"].astype("datetime64[ns]")
13901390
wr.s3.to_parquet(
13911391
df=df,
13921392
path=path,

tests/unit/test_data_quality.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,14 @@ def test_recommendation_ruleset(df, path, name, glue_database, glue_table, glue_
8282
iam_role_arn=glue_data_quality_role,
8383
number_of_workers=2,
8484
)
85-
df_rules = df_recommended_ruleset.append(
86-
{"rule_type": "ColumnValues", "parameter": '"c2"', "expression": "in [0, 1, 2]"}, ignore_index=True
85+
df_rules = pd.concat(
86+
[
87+
df_recommended_ruleset,
88+
pd.DataFrame(
89+
[{"rule_type": "ColumnValues", "parameter": '"c2"', "expression": "in [0, 1, 2]"}],
90+
),
91+
],
92+
ignore_index=True,
8793
)
8894
wr.data_quality.create_ruleset(
8995
name=glue_ruleset,
@@ -179,9 +185,14 @@ def test_update_ruleset(df: pd.DataFrame, glue_database: str, glue_table: str, g
179185
table=glue_table,
180186
df_rules=df_rules,
181187
)
182-
183-
df_rules = df_rules.append(
184-
{"rule_type": "ColumnValues", "parameter": '"c2"', "expression": "in [0, 1, 2]"}, ignore_index=True
188+
df_rules = pd.concat(
189+
[
190+
df_rules,
191+
pd.DataFrame(
192+
[{"rule_type": "ColumnValues", "parameter": '"c2"', "expression": "in [0, 1, 2]"}],
193+
),
194+
],
195+
ignore_index=True,
185196
)
186197

187198
wr.data_quality.update_ruleset(name=glue_ruleset, df_rules=df_rules)

tests/unit/test_s3_text.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@
2727
def test_csv_encoding(path, encoding, strings, wrong_encoding, exception, line_terminator, chunksize, use_threads):
2828
file_path = f"{path}0.csv"
2929
df = pd.DataFrame({"c0": [1, 2, 3], "c1": strings})
30-
wr.s3.to_csv(
31-
df, file_path, index=False, encoding=encoding, line_terminator=line_terminator, use_threads=use_threads
32-
)
30+
wr.s3.to_csv(df, file_path, index=False, encoding=encoding, lineterminator=line_terminator, use_threads=use_threads)
3331
df2 = wr.s3.read_csv(
3432
file_path, encoding=encoding, lineterminator=line_terminator, use_threads=use_threads, chunksize=chunksize
3533
)
@@ -347,11 +345,11 @@ def test_csv_additional_kwargs(path, kms_key_id, s3_additional_kwargs, use_threa
347345
assert desc.get("ServerSideEncryption") == "AES256"
348346

349347

350-
@pytest.mark.parametrize("line_terminator", ["\n", "\r", "\n\r"])
348+
@pytest.mark.parametrize("line_terminator", ["\n", "\r", "\r\n"])
351349
def test_csv_line_terminator(path, line_terminator):
352350
file_path = f"{path}0.csv"
353351
df = pd.DataFrame(data={"reading": ["col1", "col2"], "timestamp": [1601379427618, 1601379427625], "value": [1, 2]})
354-
wr.s3.to_csv(df=df, path=file_path, index=False, line_terminator=line_terminator)
352+
wr.s3.to_csv(df=df, path=file_path, index=False, lineterminator=line_terminator)
355353
df2 = wr.s3.read_csv(file_path)
356354
assert df.equals(df2)
357355

0 commit comments

Comments
 (0)