|
| 1 | +FROM ubuntu:22.04 |
| 2 | + |
| 3 | +ENV DEBIAN_FRONTEND=noninteractive |
| 4 | +ENV TZ=Asia/Jerusalem |
| 5 | +ENV DJANGO_SETTINGS_MODULE=sefaria.settings |
| 6 | +ENV MONGO_HOST=mongodb |
| 7 | +ENV MONGO_PORT=27017 |
| 8 | +ENV MONGO_DB_NAME=sefaria |
| 9 | +ENV PIP_NO_CACHE_DIR=1 |
| 10 | +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 |
| 11 | + |
| 12 | +# Install base system dependencies and add deadsnakes PPA for Python 3.9 |
| 13 | +RUN apt-get update -y && \ |
| 14 | + apt-get install -y --no-install-recommends \ |
| 15 | + software-properties-common \ |
| 16 | + gpg-agent \ |
| 17 | + && add-apt-repository -y ppa:deadsnakes/ppa \ |
| 18 | + && apt-get update -y && \ |
| 19 | + apt-get install -y --no-install-recommends \ |
| 20 | + aria2 \ |
| 21 | + ca-certificates \ |
| 22 | + tar \ |
| 23 | + zstd \ |
| 24 | + wget \ |
| 25 | + netcat-openbsd \ |
| 26 | + git \ |
| 27 | + curl \ |
| 28 | + jq \ |
| 29 | + unzip \ |
| 30 | + python3.9 \ |
| 31 | + python3.9-venv \ |
| 32 | + python3.9-dev \ |
| 33 | + python3.9-distutils \ |
| 34 | + libre2-dev \ |
| 35 | + pybind11-dev \ |
| 36 | + build-essential \ |
| 37 | + cmake \ |
| 38 | + ninja-build \ |
| 39 | + libpq-dev \ |
| 40 | + sudo \ |
| 41 | + && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 \ |
| 42 | + && apt-get clean \ |
| 43 | + && rm -rf /var/lib/apt/lists/* |
| 44 | + |
| 45 | +# Make python3.9 the default python |
| 46 | +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 && \ |
| 47 | + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 |
| 48 | + |
| 49 | +# Install MongoDB Database Tools (detect architecture) |
| 50 | +ENV TOOLS_VER=100.9.4 |
| 51 | +RUN ARCH=$(dpkg --print-architecture) && \ |
| 52 | + if [ "$ARCH" = "arm64" ]; then \ |
| 53 | + MONGO_ARCH="arm64"; \ |
| 54 | + else \ |
| 55 | + MONGO_ARCH="x86_64"; \ |
| 56 | + fi && \ |
| 57 | + wget -q "https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2204-${MONGO_ARCH}-${TOOLS_VER}.tgz" && \ |
| 58 | + tar -xzf "mongodb-database-tools-ubuntu2204-${MONGO_ARCH}-${TOOLS_VER}.tgz" && \ |
| 59 | + mv mongodb-database-tools-ubuntu2204-${MONGO_ARCH}-${TOOLS_VER}/bin/* /usr/local/bin/ && \ |
| 60 | + rm -rf mongodb-database-tools-ubuntu2204-${MONGO_ARCH}-${TOOLS_VER}* |
| 61 | + |
| 62 | +# Install GitHub CLI (optional, for releases) |
| 63 | +RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \ |
| 64 | + chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \ |
| 65 | + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \ |
| 66 | + apt-get update && \ |
| 67 | + apt-get install -y gh && \ |
| 68 | + apt-get clean && \ |
| 69 | + rm -rf /var/lib/apt/lists/* |
| 70 | + |
| 71 | +WORKDIR /app |
| 72 | + |
| 73 | +# Copy all scripts and Python files |
| 74 | +COPY *.sh *.py ./ |
| 75 | + |
| 76 | +# Make all scripts executable |
| 77 | +RUN chmod +x *.sh |
| 78 | + |
| 79 | +# Create exports directory |
| 80 | +RUN mkdir -p /app/exports |
| 81 | +ENV SEFARIA_EXPORT_PATH=/app/exports |
| 82 | + |
| 83 | +# Create entrypoint script |
| 84 | +RUN echo '#!/bin/bash\n\ |
| 85 | +set -e\n\ |
| 86 | +\n\ |
| 87 | +echo "=== Sefaria Export Pipeline ==="\n\ |
| 88 | +echo "MongoDB: $MONGO_HOST:$MONGO_PORT"\n\ |
| 89 | +echo "Database: $MONGO_DB_NAME"\n\ |
| 90 | +echo ""\n\ |
| 91 | +\n\ |
| 92 | +# Wait for MongoDB\n\ |
| 93 | +echo "Waiting for MongoDB..."\n\ |
| 94 | +./11_wait_for_mongodb.sh\n\ |
| 95 | +\n\ |
| 96 | +# Run the export pipeline\n\ |
| 97 | +echo "Starting export pipeline..."\n\ |
| 98 | +\n\ |
| 99 | +./01_compute_timestamp.sh\n\ |
| 100 | +./04_download_small_dump.sh\n\ |
| 101 | +./05_clone_sefaria_project.sh\n\ |
| 102 | +./06_install_build_deps.sh || true\n\ |
| 103 | +./07_pip_install_requirements.sh || ./08_fallback_built_google_re2.sh\n\ |
| 104 | +./09_create_exports_dir.sh\n\ |
| 105 | +./10_create_local_settings.sh\n\ |
| 106 | +./12_restore_db_from_dump.sh\n\ |
| 107 | +./13_check_export_module.sh\n\ |
| 108 | +./14_run_exports.sh\n\ |
| 109 | +./15_verify_exports.sh\n\ |
| 110 | +./16_drop_db.sh\n\ |
| 111 | +./17a_remove_english_in_exports.sh\n\ |
| 112 | +./17b_flatten_hebrew_in_exports.sh\n\ |
| 113 | +./17_build_combined_archive.sh\n\ |
| 114 | +./18_split_archive.sh\n\ |
| 115 | +\n\ |
| 116 | +echo ""\n\ |
| 117 | +echo "=== Export complete! ==="\n\ |
| 118 | +echo "Archives available in /app/exports"\n\ |
| 119 | +ls -lah /app/exports/\n\ |
| 120 | +' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh |
| 121 | + |
| 122 | +ENTRYPOINT ["/app/entrypoint.sh"] |
0 commit comments