llama-cpp OpenAI-compatible service is added

parfeniukink · parfeniukink · commit 02bd3a61c19f · 2024-08-19T09:39:25.000+03:00
diff --git a/actions/llama-cpp/README.md b/actions/llama-cpp/README.md
@@ -0,0 +1,15 @@
+# Summary
+
+Used to run the llama.cpp OpenAI-compatible server.
+
+## Usage
+
+```yaml
+steps:
+  - name:
+    uses: neuralmagic/nm-actions/actions/llama-cpp@main
+    with:
+      port: 8000
+      model: "aminkhalafi/Phi-3-mini-4k-instruct-Q4_K_M-GGUF"
+      context-size: 2048
+```
diff --git a/actions/llama-cpp/action.yaml b/actions/llama-cpp/action.yaml
@@ -0,0 +1,37 @@
+name: "Run llama.cpp"
+description: "Run llama.cpp OpenAI compatible web server"
+
+inputs:
+  port:
+    description: "The port of running service"
+    required: false
+    default: 8080
+  model:
+    description: "The Hugging Face model"
+    required: false
+    default: "aminkhalafi/Phi-3-mini-4k-instruct-Q4_K_M-GGUF"
+  context-size:
+    description: "The size of input context size (tokens)"
+    required: false
+    default: 2048
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install llama.cpp
+      id: install
+      shell: bash
+      run: |
+        brew install llama.cpp
+
+    - name: Start llama.cpp web server
+      id: start
+      shell: bash
+      run: |
+        llama-server --hf-repo "${{inputs.port}}" -ctx-size "${{inputs.context-size}}" --port "${{inputs.port}}" &
+
+    - name: Wait llama server to be started
+      id: wait
+      shell: bash
+      run: |
+        sleep 10