Update CI #2795
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: 'Build & Test (Linux)' | |
| on: | |
| push: | |
| branches: [ "main" ] | |
| pull_request: | |
| branches: [ "main" ] | |
| types: [opened, synchronize, reopened, labeled] | |
| jobs: | |
| build: | |
| if: >- | |
| github.event.action == 'opened' || | |
| contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: ubuntu-22.04 | |
| strategy: | |
| matrix: | |
| python-version: ['3.10', '3.12'] | |
| env: | |
| SCCACHE_GHA_ENABLED: "true" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Install and start etcd | |
| run: | | |
| wget https://github.com/etcd-io/etcd/releases/download/v3.6.1/etcd-v3.6.1-linux-amd64.tar.gz | |
| tar xzf etcd-v3.6.1-linux-amd64.tar.gz | |
| sudo mv etcd-v3.6.1-linux-amd64/etcd* /usr/local/bin/ | |
| etcd --advertise-client-urls http://127.0.0.1:2379 --listen-client-urls http://127.0.0.1:2379 & | |
| sleep 3 # Give etcd time to start | |
| etcdctl --endpoints=http://127.0.0.1:2379 endpoint health | |
| shell: bash | |
| - name: Free up disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| - name: Install CUDA Toolkit | |
| uses: Jimver/[email protected] | |
| with: | |
| cuda: '12.8.1' | |
| linux-local-args: '["--toolkit"]' | |
| method: 'network' | |
| sub-packages: '["nvcc"]' | |
| - name: Run sccache-cache | |
| uses: mozilla-actions/[email protected] | |
| - name: Configure sccache | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| core.exportVariable('ACTIONS_RESULTS_URL', process.env.ACTIONS_RESULTS_URL || ''); | |
| core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); | |
| - name: Run sccache stat for check | |
| shell: bash | |
| run: ${SCCACHE_PATH} --show-stats | |
| - name: Configure project | |
| run: | | |
| sudo apt update -y | |
| sudo bash -x dependencies.sh -y | |
| mkdir build | |
| cd build | |
| cmake .. -DUSE_HTTP=ON -DUSE_ETCD=ON -DSTORE_USE_ETCD=ON -DENABLE_ASAN=ON -DENABLE_SCCACHE=ON | |
| shell: bash | |
| - name: Build project | |
| run: | | |
| cd build | |
| make -j | |
| sudo make install | |
| shell: bash | |
| - name: Build nvlink_allocator.so | |
| run: | | |
| mkdir -p build/mooncake-transfer-engine/nvlink-allocator | |
| cd mooncake-transfer-engine/nvlink-allocator | |
| export LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LIBRARY_PATH | |
| bash build.sh ../../build/mooncake-transfer-engine/nvlink-allocator/ | |
| shell: bash | |
| - name: Start Metadata Server | |
| run: | | |
| cd mooncake-transfer-engine/example/http-metadata-server-python | |
| pip install aiohttp | |
| python ./bootstrap_server.py & | |
| shell: bash | |
| - name: Test (in build env) | |
| run: | | |
| cd build | |
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib | |
| ldconfig -v || echo "always continue" | |
| MC_METADATA_SERVER=http://127.0.0.1:8080/metadata DEFAULT_KV_LEASE_TTL=500 make test -j ARGS="-V" | |
| shell: bash | |
| - name: Generate Python version tag | |
| id: generate_tag_build | |
| run: | | |
| echo "python_version_tag=$(echo ${{ matrix.python-version }} | tr -d '.')" >> $GITHUB_OUTPUT | |
| shell: bash | |
| - name: Build Python wheel | |
| run: | | |
| # Build wheel with specific Python version | |
| PYTHON_VERSION=${{ matrix.python-version }} OUTPUT_DIR=dist-py${{ steps.generate_tag_build.outputs.python_version_tag }} ./scripts/build_wheel.sh | |
| shell: bash | |
| test-wheel-ubuntu: | |
| needs: build-flags | |
| strategy: | |
| matrix: | |
| ubuntu-version: [ubuntu-22.04, ubuntu-24.04] | |
| python-version: ['3.10', '3.12'] | |
| runs-on: ${{ matrix.ubuntu-version }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Generate Python version tag | |
| id: generate_tag_test | |
| run: | | |
| echo "python_version_tag=$(echo ${{ matrix.python-version }} | tr -d '.')" >> $GITHUB_OUTPUT | |
| shell: bash | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: mooncake-wheel-ubuntu-py${{ steps.generate_tag_test.outputs.python_version_tag }} | |
| path: mooncake-wheel/dist | |
| - name: Verify wheel file exists | |
| run: | | |
| ls -la mooncake-wheel/dist/ | |
| if [ ! -f mooncake-wheel/dist/*.whl ]; then | |
| echo "ERROR: No wheel file found in mooncake-wheel/dist/" | |
| exit 1 | |
| fi | |
| shell: bash | |
| - name: Run installation test script | |
| run: | | |
| bash scripts/test_installation.sh | |
| shell: bash | |
| - name: Start metadata server | |
| run: | | |
| source test_env/bin/activate | |
| mooncake_http_metadata_server --port 8080 & | |
| shell: bash | |
| - name: Run tests with ssd | |
| run: | | |
| source test_env/bin/activate | |
| MC_STORE_MEMCPY=false TEST_SSD_OFFLOAD_IN_EVICT=true ./scripts/run_tests.sh | |
| deactivate | |
| shell: bash | |
| - name: Start Mooncake Master | |
| run: | | |
| source test_env/bin/activate | |
| mkdir -p /tmp/mooncake_storage | |
| mooncake_master \ | |
| --eviction_high_watermark_ratio=0.95 \ | |
| --cluster_id=ci_test_cluster \ | |
| --port 50051 & | |
| sleep 3 | |
| shell: bash | |
| - name: Run Python Tensor API Performance Test (CI check) | |
| env: | |
| MOONCAKE_MASTER: "127.0.0.1:50051" | |
| MOONCAKE_TE_META_DATA_SERVER: "http://127.0.0.1:8080/metadata" | |
| MOONCAKE_PROTOCOL: "tcp" | |
| LOCAL_HOSTNAME: "127.0.0.1" | |
| run: | | |
| source test_env/bin/activate | |
| python scripts/test_tensor_api.py -n 1 | |
| shell: bash | |
| test-sglang-integration: | |
| needs: build-flags | |
| runs-on: ubuntu-latest | |
| env: | |
| tone_user_name: ${{ secrets.TONE_USER_NAME }} | |
| steps: | |
| - name: trigger T-one test | |
| if: ${{ env.tone_user_name != '' }} | |
| run: | | |
| curl -L -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts > artifact.json | |
| cat artifact.json | |
| artifact_id=$(jq -r ".artifacts[] | select(.name | contains(\"py312\") ) | .id" artifact.json) | |
| signature="${{ secrets.TONE_USER_NAME }}|${{ secrets.TONE_USER_TOKEN }}|$(python3 -c "import time;print(time.time())")" | |
| signature="$(python3 -c "import base64;print(base64.b64encode(\"$signature\".encode('utf-8')).decode('utf-8'))")" | |
| curl -s -H 'Content-Type: application/json' -X POST -d "{\"workspace\":\"mooncake_test\",\"project\":\"mooncake-ci\",\"template\":\"mooncake-ci-test\",\"name\":\"mooncake-ci-${{ github.sha }}\",\"username\":\"${{ secrets.TONE_USER_NAME }}\",\"env_ifs\":\" \",\"env_info\":\"ARTIFACT_ID=${artifact_id} GIT_REPO=${{ github.repository }}\",\"signature\":\"$signature\"}" https://tone.openanolis.cn/api/job/create/ > job.json | |
| if [ "$(jq .code job.json)" == 200 ]; then | |
| echo "job created" | |
| else | |
| echo "job create failed" | |
| exit 1 | |
| fi | |
| job_id=$(jq .data.id job.json) | |
| echo "check job status here and remember to cancel it before restart the job !" | |
| echo "job_url: https://tone.openanolis.cn/ws/gclfnh19/test_result/${job_id}" | |
| echo "job_id=${job_id}" >> $GITHUB_ENV | |
| shell: bash | |
| - name: qurey job results | |
| if: ${{ env.tone_user_name != '' }} | |
| run: | | |
| time=0 | |
| while true; do | |
| if [ $time -gt 720 ]; then | |
| echo "timeout" | |
| exit 1 | |
| fi | |
| signature="${{ secrets.TONE_USER_NAME }}|${{ secrets.TONE_USER_TOKEN }}|$(python3 -c "import time;print(time.time())")" | |
| signature="$(python3 -c "import base64;print(base64.b64encode(\"$signature\".encode('utf-8')).decode('utf-8'))")" | |
| curl -s -H 'Content-Type: application/json' -X POST -d "{\"username\":\"${{ secrets.TONE_USER_NAME }}\", \"signature\":\"$signature\", \"job_id\": \"${job_id}\"}" https://tone.openanolis.cn/api/job/query/ > job_status.json | |
| if ! [ "$(jq .code job_status.json)" == 200 ]; then | |
| echo "job query failed" | |
| exit 1 | |
| fi | |
| job_status=$(jq .data.job_second_state job_status.json) | |
| if [[ $job_status =~ "pass" ]]; then | |
| echo "job successful !" | |
| exit 0 | |
| elif [[ $job_status =~ "fail" ]] ; then | |
| echo "job failed or stopped !" | |
| exit 1 | |
| fi | |
| time=$(( time + 1)) | |
| sleep 10 | |
| done | |
| shell: bash | |
| build-flags: | |
| if: >- | |
| github.event.action == 'opened' || | |
| contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: ubuntu-22.04 | |
| strategy: | |
| matrix: | |
| python-version: ['3.10', '3.12'] | |
| env: | |
| BUILD_WITH_EP: "1" | |
| SCCACHE_GHA_ENABLED: "true" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Free up disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo rm -rf /usr/local/lib/android | |
| df -h | |
| - name: Install CUDA Toolkit | |
| uses: Jimver/[email protected] | |
| with: | |
| cuda: '12.8.1' | |
| linux-local-args: '["--toolkit"]' | |
| method: 'network' | |
| sub-packages: '["nvcc", "nvrtc-dev"]' | |
| non-cuda-sub-packages: '["libcusparse-dev", "libcublas-dev", "libcusolver-dev"]' | |
| - name: Run sccache-cache | |
| uses: mozilla-actions/[email protected] | |
| - name: Configure sccache | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| core.exportVariable('ACTIONS_RESULTS_URL', process.env.ACTIONS_RESULTS_URL || ''); | |
| core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); | |
| - name: Run sccache stat for check | |
| shell: bash | |
| run: ${SCCACHE_PATH} --show-stats | |
| - name: Install dependencies | |
| run: | | |
| sudo apt update -y | |
| sudo bash -x dependencies.sh -y | |
| pip install torch==2.8.0 | |
| df -h | |
| shell: bash | |
| - name: Build transfer engine only | |
| run: | | |
| cd mooncake-transfer-engine | |
| mkdir build | |
| cd build | |
| export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH | |
| export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH | |
| cmake .. -DUSE_ETCD=OFF -DUSE_REDIS=ON -DUSE_HTTP=ON -DWITH_METRICS=ON -DBUILD_UNIT_TESTS=ON -DBUILD_EXAMPLES=ON -DENABLE_SCCACHE=ON -DUSE_CUDA=OFF -DUSE_MNNVL=OFF -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs" | |
| make -j4 | |
| sudo make install | |
| df -h | |
| shell: bash | |
| - name: Configure project with all settings are ON | |
| run: | | |
| mkdir build | |
| cd build | |
| cmake .. -DUSE_ETCD=ON -DUSE_REDIS=ON -DUSE_HTTP=ON -DWITH_STORE=ON -DWITH_P2P_STORE=ON -DWITH_EP=ON -DWITH_METRICS=ON -DBUILD_UNIT_TESTS=ON -DBUILD_EXAMPLES=ON -DENABLE_SCCACHE=ON -DUSE_CUDA=OFF -DUSE_MNNVL=OFF -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs" | |
| shell: bash | |
| # TODO: lack USE_NVMEOF,USE_CUDA,USE_MNNVL | |
| - name: Build project with all settings are ON | |
| run: | | |
| cd build | |
| make -j4 | |
| sudo make install | |
| df -h | |
| shell: bash | |
| - name: Configure project with unit tests and examples | |
| run: | | |
| cd build | |
| cmake .. -DBUILD_UNIT_TESTS=ON -DBUILD_EXAMPLES=ON -DENABLE_SCCACHE=ON | |
| shell: bash | |
| # TODO: lack WITH_RUST_EXAMPLE | |
| - name: Build project with unit tests and examples | |
| run: | | |
| cd build | |
| make -j4 | |
| sudo make install | |
| shell: bash | |
| - name: Configure project | |
| run: | | |
| cd build | |
| rm -r */tests | |
| cmake .. -DBUILD_UNIT_TESTS=OFF -DBUILD_EXAMPLES=OFF -DUSE_HTTP=ON -DENABLE_SCCACHE=ON | |
| shell: bash | |
| - name: Build project | |
| run: | | |
| cd build | |
| make -j4 | |
| sudo make install | |
| shell: bash | |
| - name: Build nvlink_allocator.so | |
| run: | | |
| mkdir -p build/mooncake-transfer-engine/nvlink-allocator | |
| cd mooncake-transfer-engine/nvlink-allocator | |
| export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH | |
| export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH | |
| export LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LIBRARY_PATH | |
| bash build.sh ../../build/mooncake-transfer-engine/nvlink-allocator/ | |
| shell: bash | |
| - name: Generate Python version tag | |
| id: generate_tag_flags | |
| run: | | |
| echo "python_version_tag=$(echo ${{ matrix.python-version }} | tr -d '.')" >> $GITHUB_OUTPUT | |
| shell: bash | |
| - name: Build Python wheel | |
| run: | | |
| # Build wheel with specific Python version | |
| PYTHON_VERSION=${{ matrix.python-version }} OUTPUT_DIR=dist-py${{ steps.generate_tag_flags.outputs.python_version_tag }} ./scripts/build_wheel.sh | |
| shell: bash | |
| - name: Upload Python wheel artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: mooncake-wheel-ubuntu-py${{ steps.generate_tag_flags.outputs.python_version_tag }} | |
| path: mooncake-wheel/dist-py${{ steps.generate_tag_flags.outputs.python_version_tag }}/*.whl | |
| build-docker: | |
| name: Build Docker Image | |
| if: >- | |
| github.event.action == 'opened' || | |
| contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v2 | |
| - name: Build Docker image | |
| run: docker build -t mooncake-app . | |
| spell-check: | |
| name: Spell Check with Typos | |
| if: >- | |
| github.event.action == 'opened' || | |
| contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Checkout Actions Repository | |
| uses: actions/checkout@v4 | |
| - name: Spell Check Repo | |
| uses: crate-ci/[email protected] | |
| clang-format: | |
| name: Check code format | |
| if: >- | |
| github.event.action == 'opened' || | |
| contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Checkout Actions Repository | |
| uses: actions/checkout@v4 | |
| - name: Install clang-format 20 | |
| run: | | |
| wget https://apt.llvm.org/llvm.sh | |
| chmod +x llvm.sh | |
| sudo ./llvm.sh 20 | |
| sudo apt-get install -y clang-format-20 | |
| - name: run clang-format-20 | |
| run: | | |
| # the old clang-format-14 which is the defaut version in ubuntu 22.04, | |
| # is inconsistent with clang-format-20. | |
| ls -lh /usr/bin/clang-format* | |
| clang-format --version | |
| clang-format-20 --version | |
| # skip cachelib_memory_allocator | |
| find . -type f \( -name "*.h" -o -name "*.cpp" \) | grep -v cachelib_memory_allocator | xargs clang-format-20 -style=file -i | |
| if ! git diff --exit-code; then | |
| echo "Please follow the .clang-format code style, try clang-format -i FILENAME" | |
| exit 1 | |
| fi | |
| shell: bash |