-
Notifications
You must be signed in to change notification settings - Fork 13.5k
ggml : add repack testing support #16182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 7 commits
9e38863
3a91c34
9ce53a3
3d23252
440abf4
49f1e76
fe21170
17b2e26
0e9c0b7
2322b1b
8c3eb6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| name: Test CPU Variants | ||
|
|
||
| on: | ||
| workflow_dispatch: | ||
| inputs: | ||
| operation: | ||
| description: 'Operation to test (e.g., MUL_MAT or full spec)' | ||
| required: false | ||
| default: 'MUL_MAT' | ||
| type: string | ||
| variant: | ||
| description: 'CPU variant to test (leave empty to list available variants)' | ||
| required: false | ||
| default: '' | ||
| type: string | ||
|
|
||
| jobs: | ||
| test-cpu-variant-sve: | ||
| runs-on: ubuntu-24.04-arm | ||
| steps: | ||
| - name: Clone | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Dependencies | ||
| run: | | ||
| sudo apt-get update | ||
| sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y | ||
| sudo apt-get update | ||
| sudo apt-get install build-essential gcc-14 g++-14 | ||
| sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 | ||
| sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 | ||
| gcc --version | ||
|
|
||
| - name: Build with CPU reference backend | ||
| run: | | ||
| cmake -B build -S . \ | ||
| -DGGML_CPU_REF_BACKEND=ON \ | ||
| -DGGML_CPU_ALL_VARIANTS=ON \ | ||
| -DGGML_CPU_REPACK=ON \ | ||
| -DGGML_NATIVE=OFF \ | ||
| -DGGML_BACKEND_DL=ON \ | ||
| -DGGML_BLAS=OFF \ | ||
| -DLLAMA_CURL=OFF \ | ||
| -DCMAKE_BUILD_TYPE=Release | ||
|
|
||
| cmake --build build -j8 | ||
|
|
||
| - name: List available CPU variants | ||
| run: | | ||
| echo "Available CPU variants:" | ||
| ./build/bin/test-backend-ops cpu-variants --list | ||
|
|
||
| - name: Test CPU variant | ||
| if: ${{ inputs.variant != '' }} | ||
| run: | | ||
| echo "Testing variant: ${{ inputs.variant }}" | ||
| echo "Operation: ${{ inputs.operation }}" | ||
| ./build/bin/test-backend-ops cpu-variants \ | ||
| --variant ${{ inputs.variant }} \ | ||
| -o "${{ inputs.operation }}" | ||
|
|
||
| - name: Instructions | ||
| if: ${{ inputs.variant == '' }} | ||
| run: | | ||
| echo "==========================================" | ||
| echo "No variant specified - only listed available variants above" | ||
| echo "To test a specific variant, re-run this workflow with:" | ||
| echo " - variant: one of the variants listed above" | ||
| echo " - operation: your operation string (default: MUL_MAT)" | ||
| echo "==========================================" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1869,8 +1869,45 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons | |
| return nullptr; | ||
| } | ||
|
|
||
| static bool supports_tensor(const struct ggml_tensor * op) { | ||
| if (op->op == GGML_OP_MUL_MAT && | ||
| op->src[0]->buffer && | ||
| (ggml_n_dims(op->src[0]) == 2) && (ggml_n_dims(op->src[1]) == 2) && | ||
| ggml_repack_get_optimal_repack_type(op->src[0])) { | ||
|
|
||
| if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) { | ||
| return false; | ||
| } | ||
|
|
||
| if (op->src[1]->type == GGML_TYPE_F32) { | ||
| return true; | ||
| } | ||
|
|
||
| } else if (op->op == GGML_OP_MUL_MAT_ID && op->src[0]->buffer && | ||
| (ggml_n_dims(op->src[0]) == 3) && (ggml_n_dims(op->src[1]) == 2) && | ||
| ggml_repack_get_optimal_repack_type(op->src[0])) { | ||
|
|
||
| if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) { | ||
| return false; | ||
| } | ||
|
|
||
| if (op->src[1]->type == GGML_TYPE_F32) { | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| static enum ggml_status ggml_backend_cpu_repack_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { | ||
| tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type(tensor)); | ||
| if (tensor->op == GGML_OP_NONE) { | ||
| tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type(tensor)); | ||
| tensor->buffer = buffer; | ||
| } | ||
|
|
||
| if (supports_tensor(tensor)) { | ||
| tensor->src[0]->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type(tensor->src[0])); | ||
| tensor->src[0]->buffer = buffer; | ||
| } | ||
|
||
|
|
||
| GGML_UNUSED(buffer); | ||
| return GGML_STATUS_SUCCESS; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use
GGML_SYSTEM_ARCHinstead.