|
| 1 | +"""Tests for S3 integration with pandas using s3fs.""" |
| 2 | + |
| 3 | +import io |
| 4 | + |
| 5 | +import pytest |
| 6 | + |
| 7 | +# Check for required dependencies |
| 8 | +try: |
| 9 | + import pandas as pd |
| 10 | + import s3fs |
| 11 | + |
| 12 | + _ = s3fs |
| 13 | + HAS_S3_SUPPORT = True |
| 14 | +except ImportError: |
| 15 | + HAS_S3_SUPPORT = False |
| 16 | + |
| 17 | +try: |
| 18 | + import boto3 |
| 19 | + from moto import mock_aws |
| 20 | + |
| 21 | + HAS_MOTO = True |
| 22 | +except ImportError: |
| 23 | + HAS_MOTO = False |
| 24 | + |
| 25 | + |
| 26 | +@pytest.mark.skipif(not HAS_S3_SUPPORT, reason="s3fs and pandas not available") |
| 27 | +@pytest.mark.skipif(not HAS_MOTO, reason="moto not available") |
| 28 | +class TestS3PandasIntegration: |
| 29 | + """Test that pandas can read from S3 when s3fs is installed.""" |
| 30 | + |
| 31 | + def test_s3fs_and_pandas_integration(self): |
| 32 | + """Test that s3fs and pandas work together (simulated).""" |
| 33 | + with mock_aws(): |
| 34 | + # Setup mock S3 |
| 35 | + bucket = "mock-bucket" |
| 36 | + key = "mock.csv" |
| 37 | + |
| 38 | + s3_client = boto3.client("s3", region_name="us-east-1") |
| 39 | + s3_client.create_bucket(Bucket=bucket) |
| 40 | + |
| 41 | + # Upload test CSV |
| 42 | + csv_data = "key,value\nk1,v1\nk2,v2" |
| 43 | + s3_client.put_object(Bucket=bucket, Key=key, Body=csv_data) |
| 44 | + |
| 45 | + # 1. Use boto3 to get the object (like s3fs would internally) |
| 46 | + response = s3_client.get_object(Bucket=bucket, Key=key) |
| 47 | + csv_content = response["Body"].read().decode("utf-8") |
| 48 | + |
| 49 | + # 2. Use pandas to read the CSV content (like pandas would) |
| 50 | + df = pd.read_csv(io.StringIO(csv_content)) |
| 51 | + |
| 52 | + # Verify the result |
| 53 | + assert list(df.columns) == ["key", "value"] |
| 54 | + assert df.iloc[0]["key"] == "k1" |
| 55 | + assert df.iloc[1]["key"] == "k2" |
| 56 | + assert df.iloc[0]["value"] == "v1" |
| 57 | + assert df.iloc[1]["value"] == "v2" |
0 commit comments