Skip to content

Commit 0ab10aa

Browse files
authored
TST: Refactor S3 tests (#61703)
* reformat * update signatures * cleanup * change fixture scope * update s3 test * use service container for linux CI * yield instead of return * remove flask from dep
1 parent 0490e1b commit 0ab10aa

File tree

15 files changed

+223
-358
lines changed

15 files changed

+223
-358
lines changed

.github/workflows/unit-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,6 @@ jobs:
140140

141141
moto:
142142
image: motoserver/moto:5.0.27
143-
env:
144-
AWS_ACCESS_KEY_ID: foobar_key
145-
AWS_SECRET_ACCESS_KEY: foobar_secret
146143
ports:
147144
- 5000:5000
148145

environment.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,8 @@ dependencies:
6464
- dask-core
6565
- seaborn-base
6666

67-
# local testing dependencies
67+
# Mocking s3 tests
6868
- moto
69-
- flask
7069

7170
# benchmarks
7271
- asv>=0.6.1

pandas/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2116,3 +2116,9 @@ def temp_file(tmp_path):
21162116
file_path = tmp_path / str(uuid.uuid4())
21172117
file_path.touch()
21182118
return file_path
2119+
2120+
2121+
@pytest.fixture(scope="session")
2122+
def monkeysession():
2123+
with pytest.MonkeyPatch.context() as mp:
2124+
yield mp

pandas/tests/io/conftest.py

Lines changed: 60 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import shlex
2-
import subprocess
3-
import time
41
import uuid
52

63
import pytest
@@ -50,93 +47,77 @@ def xml_file(datapath):
5047
return datapath("io", "data", "xml", "books.xml")
5148

5249

53-
@pytest.fixture
54-
def s3_base(worker_id, monkeypatch):
55-
"""
56-
Fixture for mocking S3 interaction.
50+
@pytest.fixture(scope="session")
51+
def aws_credentials(monkeysession):
52+
"""Mocked AWS Credentials for moto."""
53+
monkeysession.setenv("AWS_ACCESS_KEY_ID", "testing")
54+
monkeysession.setenv("AWS_SECRET_ACCESS_KEY", "testing")
55+
monkeysession.setenv("AWS_SECURITY_TOKEN", "testing")
56+
monkeysession.setenv("AWS_SESSION_AWS_SESSION_TOKEN", "testing")
57+
monkeysession.setenv("AWS_DEFAULT_REGION", "us-east-1")
5758

58-
Sets up moto server in separate process locally
59-
Return url for motoserver/moto CI service
60-
"""
61-
pytest.importorskip("s3fs")
62-
pytest.importorskip("boto3")
63-
64-
# temporary workaround as moto fails for botocore >= 1.11 otherwise,
65-
# see https://github.com/spulec/moto/issues/1924 & 1952
66-
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
67-
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
68-
if is_ci_environment():
69-
if is_platform_arm() or is_platform_mac() or is_platform_windows():
70-
# NOT RUN on Windows/macOS, only Ubuntu
71-
# - subprocess in CI can cause timeouts
72-
# - GitHub Actions do not support
73-
# container services for the above OSs
74-
pytest.skip(
75-
"S3 tests do not have a corresponding service on "
76-
"Windows or macOS platforms"
77-
)
78-
else:
79-
# set in .github/workflows/unit-tests.yml
80-
yield "http://localhost:5000"
59+
60+
@pytest.fixture(scope="session")
61+
def moto_server(aws_credentials):
62+
# use service container for Linux on GitHub Actions
63+
if is_ci_environment() and not (
64+
is_platform_mac() or is_platform_arm() or is_platform_windows()
65+
):
66+
yield "http://localhost:5000"
8167
else:
82-
requests = pytest.importorskip("requests")
83-
pytest.importorskip("moto")
84-
pytest.importorskip("flask") # server mode needs flask too
85-
86-
# Launching moto in server mode, i.e., as a separate process
87-
# with an S3 endpoint on localhost
88-
89-
worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
90-
endpoint_port = f"555{worker_id}"
91-
endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"
92-
93-
# pipe to null to avoid logging in terminal
94-
with subprocess.Popen(
95-
shlex.split(f"moto_server s3 -p {endpoint_port}"),
96-
stdout=subprocess.DEVNULL,
97-
stderr=subprocess.DEVNULL,
98-
) as proc:
99-
timeout = 5
100-
while timeout > 0:
101-
try:
102-
# OK to go once server is accepting connections
103-
r = requests.get(endpoint_uri)
104-
if r.ok:
105-
break
106-
except Exception:
107-
pass
108-
timeout -= 0.1
109-
time.sleep(0.1)
110-
yield endpoint_uri
111-
112-
proc.terminate()
68+
moto_server = pytest.importorskip("moto.server")
69+
server = moto_server.ThreadedMotoServer(port=0)
70+
server.start()
71+
host, port = server.get_host_and_port()
72+
yield f"http://{host}:{port}"
73+
server.stop()
11374

11475

11576
@pytest.fixture
116-
def s3so(s3_base):
117-
return {"client_kwargs": {"endpoint_url": s3_base}}
77+
def moto_s3_resource(moto_server):
78+
boto3 = pytest.importorskip("boto3")
79+
s3 = boto3.resource("s3", endpoint_url=moto_server)
80+
return s3
11881

11982

120-
@pytest.fixture
121-
def s3_resource(s3_base):
122-
import boto3
83+
@pytest.fixture(scope="session")
84+
def s3so(moto_server):
85+
return {
86+
"client_kwargs": {
87+
"endpoint_url": moto_server,
88+
}
89+
}
12390

124-
s3 = boto3.resource("s3", endpoint_url=s3_base)
125-
return s3
91+
92+
@pytest.fixture
93+
def s3_bucket_public(moto_s3_resource):
94+
"""
95+
Create a public S3 bucket using moto.
96+
"""
97+
bucket_name = f"pandas-test-{uuid.uuid4()}"
98+
bucket = moto_s3_resource.Bucket(bucket_name)
99+
bucket.create(ACL="public-read")
100+
yield bucket
101+
bucket.objects.delete()
102+
bucket.delete()
126103

127104

128105
@pytest.fixture
129-
def s3_public_bucket(s3_resource):
130-
bucket = s3_resource.Bucket(f"pandas-test-{uuid.uuid4()}")
131-
bucket.create()
106+
def s3_bucket_private(moto_s3_resource):
107+
"""
108+
Create a private S3 bucket using moto.
109+
"""
110+
bucket_name = f"cant_get_it-{uuid.uuid4()}"
111+
bucket = moto_s3_resource.Bucket(bucket_name)
112+
bucket.create(ACL="private")
132113
yield bucket
133114
bucket.objects.delete()
134115
bucket.delete()
135116

136117

137118
@pytest.fixture
138-
def s3_public_bucket_with_data(
139-
s3_public_bucket, tips_file, jsonl_file, feather_file, xml_file
119+
def s3_bucket_public_with_data(
120+
s3_bucket_public, tips_file, jsonl_file, feather_file, xml_file
140121
):
141122
"""
142123
The following datasets
@@ -158,22 +139,13 @@ def s3_public_bucket_with_data(
158139
]
159140
for s3_key, file_name in test_s3_files:
160141
with open(file_name, "rb") as f:
161-
s3_public_bucket.put_object(Key=s3_key, Body=f)
162-
return s3_public_bucket
163-
164-
165-
@pytest.fixture
166-
def s3_private_bucket(s3_resource):
167-
bucket = s3_resource.Bucket(f"cant_get_it-{uuid.uuid4()}")
168-
bucket.create(ACL="private")
169-
yield bucket
170-
bucket.objects.delete()
171-
bucket.delete()
142+
s3_bucket_public.put_object(Key=s3_key, Body=f)
143+
return s3_bucket_public
172144

173145

174146
@pytest.fixture
175-
def s3_private_bucket_with_data(
176-
s3_private_bucket, tips_file, jsonl_file, feather_file, xml_file
147+
def s3_bucket_private_with_data(
148+
s3_bucket_private, tips_file, jsonl_file, feather_file, xml_file
177149
):
178150
"""
179151
The following datasets
@@ -195,8 +167,8 @@ def s3_private_bucket_with_data(
195167
]
196168
for s3_key, file_name in test_s3_files:
197169
with open(file_name, "rb") as f:
198-
s3_private_bucket.put_object(Key=s3_key, Body=f)
199-
return s3_private_bucket
170+
s3_bucket_private.put_object(Key=s3_key, Body=f)
171+
return s3_bucket_private
200172

201173

202174
_compression_formats_params = [

pandas/tests/io/excel/test_readers.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -934,29 +934,27 @@ def test_read_from_http_url(self, httpserver, read_ext):
934934

935935
@td.skip_if_not_us_locale
936936
@pytest.mark.single_cpu
937-
def test_read_from_s3_url(self, read_ext, s3_public_bucket, s3so):
938-
# Bucket created in tests/io/conftest.py
937+
def test_read_from_s3_url(self, read_ext, s3_bucket_public, s3so):
939938
with open("test1" + read_ext, "rb") as f:
940-
s3_public_bucket.put_object(Key="test1" + read_ext, Body=f)
939+
s3_bucket_public.put_object(Key="test1" + read_ext, Body=f)
941940

942-
url = f"s3://{s3_public_bucket.name}/test1" + read_ext
941+
url = f"s3://{s3_bucket_public.name}/test1" + read_ext
943942

944943
url_table = pd.read_excel(url, storage_options=s3so)
945944
local_table = pd.read_excel("test1" + read_ext)
946945
tm.assert_frame_equal(url_table, local_table)
947946

948947
@pytest.mark.single_cpu
949-
def test_read_from_s3_object(self, read_ext, s3_public_bucket, s3so):
948+
def test_read_from_s3_object(self, read_ext, s3_bucket_public, s3so):
950949
# GH 38788
951-
# Bucket created in tests/io/conftest.py
952950
with open("test1" + read_ext, "rb") as f:
953-
s3_public_bucket.put_object(Key="test1" + read_ext, Body=f)
951+
s3_bucket_public.put_object(Key="test1" + read_ext, Body=f)
954952

955953
import s3fs
956954

957955
s3 = s3fs.S3FileSystem(**s3so)
958956

959-
with s3.open(f"s3://{s3_public_bucket.name}/test1" + read_ext) as f:
957+
with s3.open(f"s3://{s3_bucket_public.name}/test1" + read_ext) as f:
960958
url_table = pd.read_excel(f)
961959

962960
local_table = pd.read_excel("test1" + read_ext)

pandas/tests/io/excel/test_style.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,16 +318,16 @@ def custom_converter(css):
318318

319319
@pytest.mark.single_cpu
320320
@td.skip_if_not_us_locale
321-
def test_styler_to_s3(s3_public_bucket, s3so):
321+
def test_styler_to_s3(s3_bucket_public, s3so):
322322
# GH#46381
323-
324-
mock_bucket_name, target_file = s3_public_bucket.name, "test.xlsx"
323+
mock_bucket_name = s3_bucket_public.name
324+
target_file = f"{uuid.uuid4()}.xlsx"
325325
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
326326
styler = df.style.set_sticky(axis="index")
327327
styler.to_excel(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
328328
timeout = 5
329329
while True:
330-
if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
330+
if target_file in (obj.key for obj in s3_bucket_public.objects.all()):
331331
break
332332
time.sleep(0.1)
333333
timeout -= 0.1

pandas/tests/io/json/test_compression.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
BytesIO,
33
StringIO,
44
)
5+
import uuid
56

67
import pytest
78

@@ -42,17 +43,18 @@ def test_read_zipped_json(datapath):
4243
@td.skip_if_not_us_locale
4344
@pytest.mark.single_cpu
4445
@pytest.mark.network
45-
def test_with_s3_url(compression, s3_public_bucket, s3so):
46+
def test_with_s3_url(compression, s3_bucket_public, s3so):
4647
# Bucket created in tests/io/conftest.py
4748
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
4849

50+
key = f"{uuid.uuid4()}.json"
4951
with tm.ensure_clean() as path:
5052
df.to_json(path, compression=compression)
5153
with open(path, "rb") as f:
52-
s3_public_bucket.put_object(Key="test-1", Body=f)
54+
s3_bucket_public.put_object(Key=key, Body=f)
5355

5456
roundtripped_df = pd.read_json(
55-
f"s3://{s3_public_bucket.name}/test-1",
57+
f"s3://{s3_bucket_public.name}/{key}",
5658
compression=compression,
5759
storage_options=s3so,
5860
)

pandas/tests/io/json/test_pandas.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os
99
import sys
1010
import time
11+
import uuid
1112

1213
import numpy as np
1314
import pytest
@@ -1411,11 +1412,10 @@ def test_read_inline_jsonl(self):
14111412
@pytest.mark.single_cpu
14121413
@pytest.mark.network
14131414
@td.skip_if_not_us_locale
1414-
def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
1415+
def test_read_s3_jsonl(self, s3_bucket_public_with_data, s3so):
14151416
# GH17200
1416-
14171417
result = read_json(
1418-
f"s3n://{s3_public_bucket_with_data.name}/items.jsonl",
1418+
f"s3n://{s3_bucket_public_with_data.name}/items.jsonl",
14191419
lines=True,
14201420
storage_options=s3so,
14211421
)
@@ -2011,14 +2011,15 @@ def test_json_multiindex(self):
20112011

20122012
@pytest.mark.single_cpu
20132013
@pytest.mark.network
2014-
def test_to_s3(self, s3_public_bucket, s3so):
2014+
def test_to_s3(self, s3_bucket_public, s3so):
20152015
# GH 28375
2016-
mock_bucket_name, target_file = s3_public_bucket.name, "test.json"
2016+
mock_bucket_name = s3_bucket_public.name
2017+
target_file = f"{uuid.uuid4()}.json"
20172018
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
20182019
df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
20192020
timeout = 5
20202021
while True:
2021-
if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
2022+
if target_file in (obj.key for obj in s3_bucket_public.objects.all()):
20222023
break
20232024
time.sleep(0.1)
20242025
timeout -= 0.1

0 commit comments

Comments
 (0)