From 2105dc3562f1a391704fc26fc729e2467325270d Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Thu, 27 Mar 2025 15:02:42 +0800
Subject: [PATCH 01/11] add support for linux-arm64

---
 .github/workflows/compile.yml                 | 33 +++++++++++----
 LLama/LLamaSharp.Runtime.targets              | 18 ++++++++
 LLama/Native/Load/NativeLibraryUtils.cs       | 42 +++++++++++++------
 LLama/Native/Load/NativeLibraryWithAvx.cs     | 16 ++++---
 .../build/LLamaSharp.Backend.Cpu.nuspec       |  8 +++-
 5 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index 5b84f6753..6226d7379 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -28,13 +28,25 @@ jobs:
         include:
           - build: 'noavx'
             defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
+            os: ubuntu-20.04
+            arch: x64
           - build: 'avx2'
             defines: ''
+            os: ubuntu-20.04
+            arch: x64
           - build: 'avx'
             defines: '-DGGML_AVX2=OFF'
+            os: ubuntu-20.04
+            arch: x64
           - build: 'avx512'
             defines: '-DGGML_AVX512=ON'
-    runs-on: ubuntu-20.04
+            os: ubuntu-20.04
+            arch: x64
+          - build: 'aarch64'
+            defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=generic'
+            os: ubuntu-20.04-arm
+            arch: arm64
+    runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -52,28 +64,28 @@ jobs:
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libllama.so
-          name: llama-bin-linux-${{ matrix.build }}-x64.so
+          name: llama-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libggml.so
-          name: ggml-bin-linux-${{ matrix.build }}-x64.so
+          name: ggml-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libggml-base.so
-          name: ggml-base-bin-linux-${{ matrix.build }}-x64.so
+          name: ggml-base-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libggml-cpu.so
-          name: ggml-cpu-bin-linux-${{ matrix.build }}-x64.so
+          name: ggml-cpu-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - name: Upload Llava
         uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libllava_shared.so
-          name: llava-bin-linux-${{ matrix.build }}-x64.so
+          name: llava-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
 
   compile-musl:
@@ -601,7 +613,7 @@ jobs:
       - name: Rearrange Files
         run: |
           # Make all directories at once
-          mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
+          mkdir --parents deps/{noavx,avx,avx2,avx512,linux-arm64,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
 
           # Linux
           cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so           deps/noavx/libggml.so
@@ -628,6 +640,13 @@ jobs:
           cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so         deps/avx512/libllama.so
           cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so  deps/avx512/libllava_shared.so
 
+          # Arm64
+          cp artifacts/ggml-bin-linux-aarch64-arm64.so/libggml.so           deps/linux-arm64/libggml.so
+          cp artifacts/ggml-base-bin-linux-aarch64-arm64.so/libggml-base.so deps/linux-arm64/libggml-base.so
+          cp artifacts/ggml-cpu-bin-linux-aarch64-arm64.so/libggml-cpu.so   deps/linux-arm64/libggml-cpu.so
+          cp artifacts/llama-bin-linux-aarch64-arm64.so/libllama.so         deps/linux-arm64/libllama.so
+          cp artifacts/llava-bin-linux-aarch64-arm64.so/libllava_shared.so  deps/linux-arm64/libllava_shared.so
+
           # Musl
           cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so           deps/musl-noavx/libggml.so
           cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so
diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 22a3e04e1..6714ddec0 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -202,6 +202,24 @@
       </None>
 	  
 
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/linux-arm64/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-arm64/native/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/linux-arm64/libggml.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-arm64/native/libggml.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/linux-arm64/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-arm64/native/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/linux-arm64/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-arm64/native/libggml-cpu.so</Link>
+      </None>
+
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/cuda11/libllama.so</Link>
diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs
index b0e8a792a..9f6457cd1 100644
--- a/LLama/Native/Load/NativeLibraryUtils.cs
+++ b/LLama/Native/Load/NativeLibraryUtils.cs
@@ -88,19 +88,28 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib
                             // On other platforms (Windows, Linux), we need to load the CPU backend from the specified AVX level directory
                             // We are using the AVX level supplied by NativeLibraryConfig, which automatically detects the highest supported AVX level for us
                             
-                            // ggml-cpu
-                            dependencyPaths.Add(Path.Combine(
-                                $"runtimes/{os}/native/{NativeLibraryConfig.AvxLevelToString(library.Metadata.AvxLevel)}",
-                                $"{libPrefix}ggml-cpu{ext}"
-                            ));
-
-                            // ggml-cuda
-                            if (library.Metadata.UseCuda)
-                                dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-cuda{ext}"));
-                    
-                            // ggml-vulkan
-                            if (library.Metadata.UseVulkan)
-                                dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-vulkan{ext}"));
+                            if (os == "linux-arm64"){
+                                dependencyPaths.Add(Path.Combine(
+                                    $"runtimes/{os}/native", 
+                                    $"{libPrefix}ggml-cpu{ext}"
+                                ));
+                            }
+                            else{
+                                // ggml-cpu
+                                dependencyPaths.Add(Path.Combine(
+                                    $"runtimes/{os}/native/{NativeLibraryConfig.AvxLevelToString(library.Metadata.AvxLevel)}",
+                                    $"{libPrefix}ggml-cpu{ext}"
+                                ));
+
+                                // ggml-cuda
+                                if (library.Metadata.UseCuda)
+                                    dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-cuda{ext}"));
+                        
+                                // ggml-vulkan
+                                if (library.Metadata.UseVulkan)
+                                    dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-vulkan{ext}"));
+                            }
+
                         }
                     }
                     
@@ -218,6 +227,13 @@ public static void GetPlatformPathParts(OSPlatform platform, out string os, out
 
             if (platform == OSPlatform.Linux)
             {
+                if(System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported){
+                    // linux arm64
+                    os = "linux-arm64";
+                    fileExtension = ".so";
+                    libPrefix = "lib";
+                    return;
+                }
                 if(RuntimeInformation.RuntimeIdentifier.ToLower().StartsWith("alpine"))
                 {
                     // alpine linux distro
diff --git a/LLama/Native/Load/NativeLibraryWithAvx.cs b/LLama/Native/Load/NativeLibraryWithAvx.cs
index 932c49866..e6cbd86f3 100644
--- a/LLama/Native/Load/NativeLibraryWithAvx.cs
+++ b/LLama/Native/Load/NativeLibraryWithAvx.cs
@@ -50,11 +50,17 @@ public IEnumerable<string> Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaL
         private string? GetAvxPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback)
         {
             NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix);
-            var avxStr = NativeLibraryConfig.AvxLevelToString(avxLevel);
-            if (!string.IsNullOrEmpty(avxStr))
-                avxStr += "/";
-            var relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}";
-            return relativePath;
+            if (os != "linux-arm64"){
+                var avxStr = NativeLibraryConfig.AvxLevelToString(avxLevel);
+                if (!string.IsNullOrEmpty(avxStr))
+                    avxStr += "/";
+                var relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}";
+                return relativePath;
+            } else {
+                var relativePath = $"runtimes/{os}/native/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}";
+                return relativePath;
+            }
+
         }
     }
 #endif
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
index 7c69534da..d173039a9 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
@@ -66,7 +66,13 @@
     <file src="runtimes/deps/avx512/libllama.so" target="runtimes\linux-x64\native\avx512\libllama.so" />
     <file src="runtimes/deps/avx512/libllava_shared.so" target="runtimes\linux-x64\native\avx512\libllava_shared.so" />
     
-	<file src="runtimes/deps/musl-noavx/libggml.so" target="runtimes\linux-musl-x64\native\noavx\libggml.so" />
+    <file src="runtimes/deps/linux-arm64/libggml.so" target="runtimes\linux-arm64\native\libggml.so" />
+    <file src="runtimes/deps/linux-arm64/libggml-base.so" target="runtimes\linux-arm64\native\libggml-base.so" />
+    <file src="runtimes/deps/linux-arm64/libggml-cpu.so" target="runtimes\linux-arm64\native\libggml-cpu.so" />
+    <file src="runtimes/deps/linux-arm64/libllama.so" target="runtimes\linux-arm64\native\libllama.so" />
+    <file src="runtimes/deps/linux-arm64/libllava_shared.so" target="runtimes\linux-arm64\native\libllava_shared.so" />
+
+	  <file src="runtimes/deps/musl-noavx/libggml.so" target="runtimes\linux-musl-x64\native\noavx\libggml.so" />
     <file src="runtimes/deps/musl-noavx/libggml-base.so" target="runtimes\linux-musl-x64\native\noavx\libggml-base.so" />
     <file src="runtimes/deps/musl-noavx/libggml-cpu.so" target="runtimes\linux-musl-x64\native\noavx\libggml-cpu.so" />
     <file src="runtimes/deps/musl-noavx/libllama.so" target="runtimes\linux-musl-x64\native\noavx\libllama.so" />

From bc4dde86ace11771430072fce5f167d8a2a3f382 Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Thu, 27 Mar 2025 15:34:22 +0800
Subject: [PATCH 02/11] update compile.yml

---
 .github/workflows/compile.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index 6226d7379..a472ad584 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -44,7 +44,7 @@ jobs:
             arch: x64
           - build: 'aarch64'
             defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=generic'
-            os: ubuntu-20.04-arm
+            os: ubuntu-22.04-arm
             arch: arm64
     runs-on: ${{ matrix.os }}
     steps:

From aeef2eb23dd1921a85fcfd0e8137f19d28b100a5 Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Thu, 27 Mar 2025 15:45:20 +0800
Subject: [PATCH 03/11] update compile.yml DGGML_CPU_ARM_ARCH=armv8-a

---
 .github/workflows/compile.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index a472ad584..1448c5618 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -43,7 +43,7 @@ jobs:
             os: ubuntu-20.04
             arch: x64
           - build: 'aarch64'
-            defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=generic'
+            defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
             os: ubuntu-22.04-arm
             arch: arm64
     runs-on: ${{ matrix.os }}

From 80d75d9281ea53ad49cc4c4e0a2b6b3bac9c84a9 Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Thu, 27 Mar 2025 16:08:55 +0800
Subject: [PATCH 04/11] update runtime.targets

---
 LLama/LLamaSharp.Runtime.targets | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 6714ddec0..2972bcadd 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -218,6 +218,10 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-arm64/native/libggml-cpu.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/linux-arm64/libllava_shared.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-arm64/native/libllava_shared.so</Link>
+      </None>
 
 
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.so">

From 6f4c53c6770b37e14d6aeb6315ab048c5a657caf Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Thu, 3 Apr 2025 12:35:40 +0800
Subject: [PATCH 05/11] add LLamaReranker and tests

---
 LLama.Unittest/Constants.cs                   |   1 +
 LLama.Unittest/LLama.Unittest.csproj          |  10 +-
 LLama.Unittest/LLamaRerankerTests.cs          |  74 ++++++++++
 .../SafeLlamaModelHandleVocabularyTests.cs    |  37 +++++
 LLama/LLamaReranker.cs                        | 137 ++++++++++++++++++
 LLama/Native/SafeLlamaModelHandle.cs          |  18 ++-
 6 files changed, 269 insertions(+), 8 deletions(-)
 create mode 100644 LLama.Unittest/LLamaRerankerTests.cs
 create mode 100644 LLama.Unittest/Native/SafeLlamaModelHandleVocabularyTests.cs
 create mode 100644 LLama/LLamaReranker.cs

diff --git a/LLama.Unittest/Constants.cs b/LLama.Unittest/Constants.cs
index a30951750..59585142a 100644
--- a/LLama.Unittest/Constants.cs
+++ b/LLama.Unittest/Constants.cs
@@ -7,6 +7,7 @@ internal static class Constants
         public static readonly string GenerativeModelPath = "Models/Llama-3.2-1B-Instruct-Q4_0.gguf";
         public static readonly string GenerativeModelPath2 = "Models/smollm-360m-instruct-add-basics-q8_0.gguf";
         public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
+        public static readonly string RerankingModelPath = "Models/jina-reranker-v1-tiny-en-FP16.gguf";
 
         public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
         public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
index 11b65557e..f407a2e7a 100644
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -34,8 +34,11 @@
 
     <DownloadFile SourceUrl="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-v0.2-Q8_0-GGUF/resolve/main/smollm-360m-instruct-add-basics-q8_0.gguf" DestinationFolder="Models" DestinationFileName="smollm-360m-instruct-add-basics-q8_0.gguf" SkipUnchangedFiles="true">
     </DownloadFile>
-    
-	<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true">
+
+    <DownloadFile SourceUrl="https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-FP16.gguf" DestinationFolder="Models" DestinationFileName="jina-reranker-v1-tiny-en-FP16.gguf" SkipUnchangedFiles="true">
+    </DownloadFile>
+
+      <DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true">
 	</DownloadFile>
     
 	<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true">
@@ -63,6 +66,9 @@
     <None Update="Models\Llama-3.2-1B-Instruct-Q4_0.gguf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
+    <None Update="Models\jina-reranker-v1-tiny-en-FP16.gguf">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
     <None Update="Models\smollm-360m-instruct-add-basics-q8_0.gguf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
diff --git a/LLama.Unittest/LLamaRerankerTests.cs b/LLama.Unittest/LLamaRerankerTests.cs
new file mode 100644
index 000000000..8b786ff9b
--- /dev/null
+++ b/LLama.Unittest/LLamaRerankerTests.cs
@@ -0,0 +1,74 @@
+using LLama.Common;
+using LLama.Extensions;
+using LLama.Native;
+using Microsoft.Extensions.AI;
+using System.Runtime.InteropServices;
+using Xunit.Abstractions;
+
+namespace LLama.Unittest;
+
+public sealed class LLamaRerankerTests
+{
+    private readonly ITestOutputHelper _testOutputHelper;
+    private readonly LLamaReranker _reranker;
+    public LLamaRerankerTests(ITestOutputHelper testOutputHelper)
+    {
+        _testOutputHelper = testOutputHelper;
+
+        var @params = new ModelParams(Constants.RerankingModelPath)
+        {
+            ContextSize = 0,
+            PoolingType = LLamaPoolingType.Rank,
+            GpuLayerCount = Constants.CIGpuLayerCount,
+
+        };
+        using var weights = LLamaWeights.LoadFromFile(@params);
+        _reranker = new LLamaReranker(weights, @params);
+    }
+
+    [Fact]
+    public async Task CompareRerankingScore()
+    {
+        
+
+        var input = "what is panda?";
+        var documents = new string[] {
+            "hi",
+            "it's a bear",
+            string.Join(", ","The giant panda (Ailuropoda melanoleuca)",
+            "sometimes called a panda bear or simply panda",
+            "is a bear species endemic to China.") 
+        };
+        var scores = await _reranker.GetRelevanceScores(input, documents, normalize: false);
+
+        Assert.True(documents.Length == scores.Count);
+
+        _testOutputHelper.WriteLine($"Rerank score 0: {scores[0]:F4}");
+        _testOutputHelper.WriteLine($"Rerank score 1: {scores[1]:F4}");
+        _testOutputHelper.WriteLine($"Rerank score 2: {scores[2]:F4}");
+    }
+
+    [Fact]
+    public async Task MostRelevantDocument()
+    {
+        var input = "what is panda?";
+        var documents = new string[] {
+            "hi",
+            "it's a bear",
+            string.Join(", ","The giant panda (Ailuropoda melanoleuca)",
+            "sometimes called a panda bear or simply panda",
+            "is a bear species endemic to China.")
+        };
+        var scores = await _reranker.GetRelevanceScores(input, documents, normalize: true);
+
+        Assert.True(documents.Length == scores.Count);
+
+        int maxIndex = scores
+            .Select((score, index) => new { Score = score, Index = index })
+            .MaxBy(x => x.Score)
+            .Index;
+
+        var maxScoreDocument = documents[maxIndex];
+        Assert.Equal(documents[2], maxScoreDocument);
+    }
+}
diff --git a/LLama.Unittest/Native/SafeLlamaModelHandleVocabularyTests.cs b/LLama.Unittest/Native/SafeLlamaModelHandleVocabularyTests.cs
new file mode 100644
index 000000000..5b8e12ac3
--- /dev/null
+++ b/LLama.Unittest/Native/SafeLlamaModelHandleVocabularyTests.cs
@@ -0,0 +1,37 @@
+using System.Text;
+using System.Xml.Linq;
+using LLama.Common;
+using LLama.Extensions;
+using Microsoft.Extensions.Logging;
+
+
+namespace LLama.Unittest.Native;
+
+public class SafeLlamaModelHandleVocabularyTests
+{
+    private readonly LLamaWeights _model;
+
+    public SafeLlamaModelHandleVocabularyTests()
+    {
+        var @params = new ModelParams(Constants.RerankingModelPath)
+        {
+            ContextSize = 0,
+            PoolingType = LLama.Native.LLamaPoolingType.Rank,
+            GpuLayerCount = Constants.CIGpuLayerCount
+        };
+        _model = LLamaWeights.LoadFromFile(@params);
+    }
+
+    [Fact]
+    public void GetLLamaTokenString()
+    {
+        var bos = _model.Vocab.BOS;
+        var eos = _model.Vocab.EOS;
+
+        var bosStr = _model.Vocab.LLamaTokenToString(bos, true);
+        var eosStr = _model.Vocab.LLamaTokenToString(eos, true);
+
+        Assert.Equal("<s>", bosStr);
+        Assert.Equal("</s>", eosStr);
+    }
+}
diff --git a/LLama/LLamaReranker.cs b/LLama/LLamaReranker.cs
new file mode 100644
index 000000000..4c0aa2394
--- /dev/null
+++ b/LLama/LLamaReranker.cs
@@ -0,0 +1,137 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using LLama.Abstractions;
+using LLama.Exceptions;
+using LLama.Native;
+using Microsoft.Extensions.Logging;
+
+namespace LLama;
+
+/// <summary>
+/// Get rank scores between prompt and documents 
+/// </summary>
+public sealed partial class LLamaReranker
+    : IDisposable
+{
+    /// <summary>
+    /// string BOS
+    /// </summary>
+    public string StrBOS { get; }
+    /// <summary>
+    /// string EOS
+    /// </summary>
+    public string StrEOS { get; }
+
+
+    /// <summary>
+    /// Dimension of embedding vectors
+    /// </summary>
+    public int EmbeddingSize => Context.EmbeddingSize;
+
+    /// <summary>
+    /// LLama Context
+    /// </summary>
+    public LLamaContext Context { get; }
+
+    /// <summary>
+    /// Create a new reranker, using the given LLamaWeights
+    /// </summary>
+    /// <param name="weights"></param>
+    /// <param name="params"></param>
+    /// <param name="logger"></param>
+    public LLamaReranker(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
+    {
+        if (@params.UBatchSize != @params.BatchSize)
+            throw new ArgumentException("For non-causal models, batch size must be equal to ubatch size", nameof(@params));
+        if (weights.NativeHandle is { HasEncoder: true, HasDecoder: true })
+            throw new NotSupportedException("Computing rank in encoder-decoder models is not supported");
+        if (@params.PoolingType != LLamaPoolingType.Rank)
+            throw new NotSupportedException("Computing rank score, PoolingType must be equal to LLamaPoolingType.Rank");
+        Context = weights.CreateContext(@params, logger);
+        NativeApi.llama_set_embeddings(Context.NativeHandle, true);
+        StrBOS = Context.Vocab.LLamaTokenToString(Context.Vocab.BOS, true) ?? "<s>";
+        StrEOS = Context.Vocab.LLamaTokenToString(Context.Vocab.EOS, true) ?? "</s>";
+    }
+
+    /// <inheritdoc />
+    public void Dispose()
+    {
+        Context.Dispose();
+    }
+
+    /// <summary>
+    /// Retrieve relevance scores for input and document by reranking
+    /// </summary>
+    /// <param name="input"></param>
+    /// <param name="documents"></param>
+    /// <param name="normalize">Whether to normalize the score to the range (0, 1)</param>
+    /// <param name="cancellationToken"></param>
+    /// <returns></returns>
+    /// <exception cref="RuntimeError"></exception>
+    /// <exception cref="NotSupportedException"></exception>
+    public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOnlyList<string> documents, bool normalize = false, CancellationToken cancellationToken = default) {
+        List<float> scores = new List<float>(documents.Count);
+        foreach (var document in documents)
+        {
+            var score = (await GetRelevanceScoreWithTokenCount(input, document, cancellationToken).ConfigureAwait(false)).Score;
+            scores.Add(normalize ? Sigmoid(score) : score);
+        }
+        return scores;
+    }
+
+
+    private async Task<(float Score, int Tokens)> GetRelevanceScoreWithTokenCount(string input, string document, CancellationToken cancellationToken = default)
+    {
+        var prompt = $"{input}</s><s>{document}";
+        // Add all of the tokens to the batch
+        var tokens = Context.Tokenize(prompt, special: true);
+        var batch = new LLamaBatch();
+        for (var i = 0; i < tokens.Length; i++)
+            batch.Add(tokens[i], i, LLamaSeqId.Zero, true);
+
+        // clear previous kv_cache values
+        Context.NativeHandle.KvCacheClear();
+
+        // Check if we should cancel the work, just before doing anything expensive (encode/decode)
+        cancellationToken.ThrowIfCancellationRequested();
+
+        // Run model
+        switch (Context.NativeHandle.ModelHandle.HasEncoder, Context.NativeHandle.ModelHandle.HasDecoder)
+        {
+            case (true, false):
+                {
+                    var result = await Context.EncodeAsync(batch, cancellationToken);
+                    if (result != EncodeResult.Ok)
+                        throw new RuntimeError($"Failed to encode: {result}");
+                    break;
+                }
+
+            case (false, true):
+                {
+                    var result = await Context.DecodeAsync(batch, cancellationToken);
+                    if (result != DecodeResult.Ok)
+                        throw new RuntimeError($"Failed to decode: {result}");
+                    break;
+                }
+
+            default:
+                throw new NotSupportedException("Unsupported model type");
+        }
+
+        var score = Context.NativeHandle.GetEmbeddingsSeq(LLamaSeqId.Zero)[0];
+
+        Context.NativeHandle.KvCacheClear();
+
+        return (score, tokens.Length);
+    }
+
+    private float Sigmoid(float x)
+    {
+        return (float)(1 / (1 + Math.Exp(-x)));
+    }
+}
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
index db198ec30..801d25167 100644
--- a/LLama/Native/SafeLlamaModelHandle.cs
+++ b/LLama/Native/SafeLlamaModelHandle.cs
@@ -651,7 +651,18 @@ internal Vocabulary(SafeLlamaModelHandle model)
                 _model = model;
             }
 
-            private string? LLamaTokenToString(LLamaToken? token, bool isSpecialToken)
+            private static LLamaToken? Normalize(LLamaToken token)
+            {
+                return token == -1 ? null : token;
+            }
+
+            /// <summary>
+            /// Translate LLamaToken to String
+            /// </summary>
+            /// <param name="token"></param>
+            /// <param name="isSpecialToken"></param>
+            /// <returns></returns>
+            public string? LLamaTokenToString(LLamaToken? token, bool isSpecialToken)
             {
                 if (!token.HasValue)
                     return null;
@@ -676,11 +687,6 @@ internal Vocabulary(SafeLlamaModelHandle model)
                 return Encoding.UTF8.GetStringFromSpan(slice);
             }
 
-            private static LLamaToken? Normalize(LLamaToken token)
-            {
-                return token == -1 ? null : token;
-            }
-
             /// <summary>
             /// Total number of tokens in this vocabulary
             /// </summary>

From c60435924440f5e7184f23028780ed4dda282abb Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Fri, 11 Apr 2025 12:55:19 +0800
Subject: [PATCH 06/11] optimize LLamaReranker function

---
 LLama.Unittest/LLamaRerankerTests.cs |  8 +--
 LLama/LLamaReranker.cs               | 83 ++++++++++++++++++++--------
 2 files changed, 65 insertions(+), 26 deletions(-)

diff --git a/LLama.Unittest/LLamaRerankerTests.cs b/LLama.Unittest/LLamaRerankerTests.cs
index 8b786ff9b..30753ffe6 100644
--- a/LLama.Unittest/LLamaRerankerTests.cs
+++ b/LLama.Unittest/LLamaRerankerTests.cs
@@ -61,12 +61,12 @@ public async Task MostRelevantDocument()
         };
         var scores = await _reranker.GetRelevanceScores(input, documents, normalize: true);
 
+        Assert.NotNull(scores);
         Assert.True(documents.Length == scores.Count);
 
-        int maxIndex = scores
-            .Select((score, index) => new { Score = score, Index = index })
-            .MaxBy(x => x.Score)
-            .Index;
+        int maxIndex = scores.Select((score, index) => (score, index))
+                             .MaxBy(x => x.score)
+                             .index;
 
         var maxScoreDocument = documents[maxIndex];
         Assert.Equal(documents[2], maxScoreDocument);
diff --git a/LLama/LLamaReranker.cs b/LLama/LLamaReranker.cs
index 4c0aa2394..a113d2363 100644
--- a/LLama/LLamaReranker.cs
+++ b/LLama/LLamaReranker.cs
@@ -18,16 +18,6 @@ namespace LLama;
 public sealed partial class LLamaReranker
     : IDisposable
 {
-    /// <summary>
-    /// string BOS
-    /// </summary>
-    public string StrBOS { get; }
-    /// <summary>
-    /// string EOS
-    /// </summary>
-    public string StrEOS { get; }
-
-
     /// <summary>
     /// Dimension of embedding vectors
     /// </summary>
@@ -54,8 +44,6 @@ public LLamaReranker(LLamaWeights weights, IContextParams @params, ILogger? logg
             throw new NotSupportedException("Computing rank score, PoolingType must be equal to LLamaPoolingType.Rank");
         Context = weights.CreateContext(@params, logger);
         NativeApi.llama_set_embeddings(Context.NativeHandle, true);
-        StrBOS = Context.Vocab.LLamaTokenToString(Context.Vocab.BOS, true) ?? "<s>";
-        StrEOS = Context.Vocab.LLamaTokenToString(Context.Vocab.EOS, true) ?? "</s>";
     }
 
     /// <inheritdoc />
@@ -65,7 +53,7 @@ public void Dispose()
     }
 
     /// <summary>
-    /// Retrieve relevance scores for input and document by reranking
+    /// Retrieve relevance scores for input and documents by reranking, execute once.
     /// </summary>
     /// <param name="input"></param>
     /// <param name="documents"></param>
@@ -74,22 +62,73 @@ public void Dispose()
     /// <returns></returns>
     /// <exception cref="RuntimeError"></exception>
     /// <exception cref="NotSupportedException"></exception>
-    public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOnlyList<string> documents, bool normalize = false, CancellationToken cancellationToken = default) {
+    public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOnlyList<string> documents, bool normalize = false, CancellationToken cancellationToken = default)
+    {
         List<float> scores = new List<float>(documents.Count);
-        foreach (var document in documents)
+        var batch = new LLamaBatch();
+        var inputTokens = Context.Tokenize(input);
+        foreach (var (index, document) in documents.Select((item, index) => (index, item)))
+        {
+            var docTokens = Context.Tokenize(document);
+            LLamaToken[] tokens = [.. inputTokens, .. docTokens];
+            for (var i = 0; i < tokens.Length; i++)
+                batch.Add(tokens[i], i, (LLamaSeqId)index, true);
+        }
+
+        // clear previous kv_cache values
+        Context.NativeHandle.KvCacheClear();
+
+        // Check if we should cancel the work, just before doing anything expensive (encode/decode)
+        cancellationToken.ThrowIfCancellationRequested();
+
+        // Run model
+        switch (Context.NativeHandle.ModelHandle.HasEncoder, Context.NativeHandle.ModelHandle.HasDecoder)
         {
-            var score = (await GetRelevanceScoreWithTokenCount(input, document, cancellationToken).ConfigureAwait(false)).Score;
+            case (true, false):
+                {
+                    var result = await Context.EncodeAsync(batch, cancellationToken);
+                    if (result != EncodeResult.Ok)
+                        throw new RuntimeError($"Failed to encode: {result}");
+                    break;
+                }
+
+            case (false, true):
+                {
+                    var result = await Context.DecodeAsync(batch, cancellationToken);
+                    if (result != DecodeResult.Ok)
+                        throw new RuntimeError($"Failed to decode: {result}");
+                    break;
+                }
+
+            default:
+                throw new NotSupportedException("Unsupported model type");
+        }
+
+        for (var i = 0; i < documents.Count; i++)
+        {
+            var score = Context.NativeHandle.GetEmbeddingsSeq((LLamaSeqId)i)[0];
             scores.Add(normalize ? Sigmoid(score) : score);
         }
+
+        Context.NativeHandle.KvCacheClear();
+
         return scores;
     }
 
-
-    private async Task<(float Score, int Tokens)> GetRelevanceScoreWithTokenCount(string input, string document, CancellationToken cancellationToken = default)
+    /// <summary>
+    /// Retrieve relevance score for input and document by reranking
+    /// </summary>
+    /// <param name="input"></param>
+    /// <param name="document"></param>
+    /// <param name="cancellationToken"></param>
+    /// <returns></returns>
+    /// <exception cref="RuntimeError"></exception>
+    /// <exception cref="NotSupportedException"></exception>
+    public async Task<(float Score, int Tokens)> GetRelevanceScoreWithTokenCount(string input, string document, bool normalize = false, CancellationToken cancellationToken = default)
     {
-        var prompt = $"{input}</s><s>{document}";
-        // Add all of the tokens to the batch
-        var tokens = Context.Tokenize(prompt, special: true);
+        var inputTokens = Context.Tokenize(input);
+        var docTokens = Context.Tokenize(document);
+        LLamaToken[] tokens = [..inputTokens, ..docTokens];
         var batch = new LLamaBatch();
         for (var i = 0; i < tokens.Length; i++)
             batch.Add(tokens[i], i, LLamaSeqId.Zero, true);
@@ -127,7 +166,7 @@ public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOn
 
         Context.NativeHandle.KvCacheClear();
 
-        return (score, tokens.Length);
+        return (normalize ? Sigmoid(score) : score, tokens.Length);
     }
 
     private float Sigmoid(float x)

From d99670c1e8f9b4360490f2b5ac0e794b8c40e02c Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Fri, 11 Apr 2025 16:37:42 +0800
Subject: [PATCH 07/11] fix Reranking if documents is too large

---
 LLama/LLamaReranker.cs | 82 +++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 29 deletions(-)

diff --git a/LLama/LLamaReranker.cs b/LLama/LLamaReranker.cs
index a113d2363..389e44d86 100644
--- a/LLama/LLamaReranker.cs
+++ b/LLama/LLamaReranker.cs
@@ -5,6 +5,7 @@
 using System.Text;
 using System.Threading;
 using System.Threading.Tasks;
+using System.Xml.Linq;
 using LLama.Abstractions;
 using LLama.Exceptions;
 using LLama.Native;
@@ -65,16 +66,52 @@ public void Dispose()
     public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOnlyList<string> documents, bool normalize = false, CancellationToken cancellationToken = default)
     {
         List<float> scores = new List<float>(documents.Count);
-        var batch = new LLamaBatch();
         var inputTokens = Context.Tokenize(input);
-        foreach (var (index, document) in documents.Select((item, index) => (index, item)))
+        var batch = new LLamaBatch();
+        var clearFlag = 0;
+
+        for(var idx = 0; idx < documents.Count; idx++)
         {
-            var docTokens = Context.Tokenize(document);
+            var docTokens = Context.Tokenize(documents[idx]);
             LLamaToken[] tokens = [.. inputTokens, .. docTokens];
+
+            if (batch.TokenCount + tokens.Length > Context.ContextSize)
+            {
+                scores.AddRange(await CalcRelevanceScores(batch, normalize, cancellationToken));
+                batch.Clear();
+                clearFlag = idx;
+            }
+
             for (var i = 0; i < tokens.Length; i++)
-                batch.Add(tokens[i], i, (LLamaSeqId)index, true);
+                batch.Add(tokens[i], i, (LLamaSeqId)(idx - clearFlag), true);
+        }
+        if (batch.LogitPositionCount > 0)
+        {
+            scores.AddRange(await CalcRelevanceScores(batch, normalize, cancellationToken));
+            batch.Clear();
         }
 
+        return scores;
+    }
+
+    /// <summary>
+    /// Retrieve relevance score for input and document by reranking
+    /// </summary>
+    /// <param name="input"></param>
+    /// <param name="document"></param>
+    /// <param name="cancellationToken"></param>
+    /// <returns></returns>
+    /// <exception cref="RuntimeError"></exception>
+    /// <exception cref="NotSupportedException"></exception>
+    public async Task<(float Score, int Tokens)> GetRelevanceScoreWithTokenCount(string input, string document, bool normalize = false, CancellationToken cancellationToken = default)
+    {
+        var inputTokens = Context.Tokenize(input);
+        var docTokens = Context.Tokenize(document);
+        LLamaToken[] tokens = [..inputTokens, ..docTokens];
+        var batch = new LLamaBatch();
+        for (var i = 0; i < tokens.Length; i++)
+            batch.Add(tokens[i], i, LLamaSeqId.Zero, true);
+
         // clear previous kv_cache values
         Context.NativeHandle.KvCacheClear();
 
@@ -104,35 +141,18 @@ public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOn
                 throw new NotSupportedException("Unsupported model type");
         }
 
-        for (var i = 0; i < documents.Count; i++)
-        {
-            var score = Context.NativeHandle.GetEmbeddingsSeq((LLamaSeqId)i)[0];
-            scores.Add(normalize ? Sigmoid(score) : score);
-        }
+        var score = Context.NativeHandle.GetEmbeddingsSeq(LLamaSeqId.Zero)[0];
 
         Context.NativeHandle.KvCacheClear();
 
-        return scores;
+        return (normalize ? Sigmoid(score) : score, tokens.Length);
     }
 
-    /// <summary>
-    /// Retrieve relevance score for input and document by reranking
-    /// </summary>
-    /// <param name="input"></param>
-    /// <param name="document"></param>
-    /// <param name="cancellationToken"></param>
-    /// <returns></returns>
-    /// <exception cref="RuntimeError"></exception>
-    /// <exception cref="NotSupportedException"></exception>
-    public async Task<(float Score, int Tokens)> GetRelevanceScoreWithTokenCount(string input, string document, bool normalize = false, CancellationToken cancellationToken = default)
+    private async Task<IReadOnlyList<float>> CalcRelevanceScores(LLamaBatch batch, bool normalize = false, CancellationToken cancellationToken = default)
     {
-        var inputTokens = Context.Tokenize(input);
-        var docTokens = Context.Tokenize(document);
-        LLamaToken[] tokens = [..inputTokens, ..docTokens];
-        var batch = new LLamaBatch();
-        for (var i = 0; i < tokens.Length; i++)
-            batch.Add(tokens[i], i, LLamaSeqId.Zero, true);
-
+        var (logicCap, _) = batch.GetLogitPositions()[batch.LogitPositionCount - 1];
+        var seqNum = logicCap.Value + 1;
+        List<float> scores = new List<float>(seqNum);
         // clear previous kv_cache values
         Context.NativeHandle.KvCacheClear();
 
@@ -162,11 +182,15 @@ public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOn
                 throw new NotSupportedException("Unsupported model type");
         }
 
-        var score = Context.NativeHandle.GetEmbeddingsSeq(LLamaSeqId.Zero)[0];
+        for (var seq = 0; seq < seqNum; seq++)
+        {
+            var score = Context.NativeHandle.GetEmbeddingsSeq((LLamaSeqId)seq)[0];
+            scores.Add(normalize ? Sigmoid(score) : score);
+        }
 
         Context.NativeHandle.KvCacheClear();
 
-        return (normalize ? Sigmoid(score) : score, tokens.Length);
+        return scores;
     }
 
     private float Sigmoid(float x)

From 05677feef5e614e9455223b122a3dee9a1735d85 Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Tue, 15 Apr 2025 16:29:06 +0800
Subject: [PATCH 08/11] fix Reranking if document contains null

---
 LLama/LLamaReranker.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LLama/LLamaReranker.cs b/LLama/LLamaReranker.cs
index 389e44d86..71c111eb6 100644
--- a/LLama/LLamaReranker.cs
+++ b/LLama/LLamaReranker.cs
@@ -72,7 +72,7 @@ public async Task<IReadOnlyList<float>> GetRelevanceScores(string input, IReadOn
 
         for(var idx = 0; idx < documents.Count; idx++)
         {
-            var docTokens = Context.Tokenize(documents[idx]);
+            var docTokens = Context.Tokenize(documents[idx] ?? "");
             LLamaToken[] tokens = [.. inputTokens, .. docTokens];
 
             if (batch.TokenCount + tokens.Length > Context.ContextSize)

From c62980f23b434de5312fd0f8794855df4b329eac Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Mon, 21 Apr 2025 14:55:18 +0100
Subject: [PATCH 09/11] Apply suggestions from code review

Upgraded Linux runners to Ubuntu 24.04
---
 .github/workflows/compile.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index 1448c5618..e91ce7179 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -28,15 +28,15 @@ jobs:
         include:
           - build: 'noavx'
             defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
-            os: ubuntu-20.04
+            os: ubuntu-24.04
             arch: x64
           - build: 'avx2'
             defines: ''
-            os: ubuntu-20.04
+            os: ubuntu-24.04
             arch: x64
           - build: 'avx'
             defines: '-DGGML_AVX2=OFF'
-            os: ubuntu-20.04
+            os: ubuntu-24.04
             arch: x64
           - build: 'avx512'
             defines: '-DGGML_AVX512=ON'
@@ -44,7 +44,7 @@ jobs:
             arch: x64
           - build: 'aarch64'
             defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
-            os: ubuntu-22.04-arm
+            os: ubuntu-24.04-arm
             arch: arm64
     runs-on: ${{ matrix.os }}
     steps:

From dfb3cc9209b511ca047c5c5aca7dbecccf5d3f1b Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Mon, 21 Apr 2025 14:55:56 +0100
Subject: [PATCH 10/11] Update .github/workflows/compile.yml

Upgraded Linux AVX512 to Ubuntu 24
---
 .github/workflows/compile.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index e91ce7179..b63c4fa85 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -40,7 +40,7 @@ jobs:
             arch: x64
           - build: 'avx512'
             defines: '-DGGML_AVX512=ON'
-            os: ubuntu-20.04
+            os: ubuntu-24.04
             arch: x64
           - build: 'aarch64'
             defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'

From 9ed73783f7f0bce122756aaa2a92bf13f0917504 Mon Sep 17 00:00:00 2001
From: nipeone <x0601y@live.com>
Date: Tue, 6 May 2025 09:55:28 +0800
Subject: [PATCH 11/11] Merge upstream/master and resolve conflicts

---
 LLama.Unittest/LLama.Unittest.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
index 22289124d..6b0e0b8f4 100644
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -46,7 +46,7 @@
             <LocalFileName>smollm-360m-instruct-add-basics-q8_0.gguf</LocalFileName>
         </DownloadFileItem>
 
-        <DownloadFileItem Include="smollm-360m-instruct-add-basics-q8_0">
+        <DownloadFileItem Include="jina-reranker-v1-tiny-en-FP16.gguf">
             <SourceUrl>https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-FP16.gguf</SourceUrl>
             <DestinationFolder>Models</DestinationFolder>
             <LocalFileName>jina-reranker-v1-tiny-en-FP16.gguf</LocalFileName>