diff --git a/dev/docker-compose.yaml b/dev/docker-compose.yaml index 2f0d08fbd9..21920c9ce6 100644 --- a/dev/docker-compose.yaml +++ b/dev/docker-compose.yaml @@ -188,18 +188,22 @@ services: # Provision service - creates test data via Spark provision: - image: python:3.12-slim + image: ghcr.io/astral-sh/uv:python3.12-bookworm-slim networks: iceberg_test: depends_on: spark-iceberg: condition: service_healthy - entrypoint: ["/bin/sh", "-c", "pip install -q 'pyspark[connect]==4.0.1' && python3 /opt/spark/provision.py && touch /tmp/provision_complete && tail -f /dev/null"] + entrypoint: ["/bin/sh", "-c", "uv run /opt/spark/provision.py && touch /tmp/provision_complete && tail -f /dev/null"] volumes: - ./spark/provision.py:/opt/spark/provision.py:ro + - uv-cache:/root/.cache/uv healthcheck: test: ["CMD-SHELL", "[ -f /tmp/provision_complete ]"] interval: 2s timeout: 2s retries: 90 start_period: 20s + +volumes: + uv-cache: diff --git a/dev/spark/provision.py b/dev/spark/provision.py index c53a1dd842..40f9ba0f38 100644 --- a/dev/spark/provision.py +++ b/dev/spark/provision.py @@ -15,6 +15,13 @@ # specific language governing permissions and limitations # under the License. +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "pyspark[connect]==4.0.1", +# ] +# /// + from pyspark.sql import SparkSession from pyspark.sql.functions import current_date, date_add, expr