File tree 2 files changed +52
-0
lines changed
2 files changed +52
-0
lines changed Original file line number Diff line number Diff line change
1
+ # Summary
2
+
3
+ Used to run the llama.cpp OpenAI-compatible server.
4
+
5
+ ## Usage
6
+
7
+ ``` yaml
8
+ steps :
9
+ - name :
10
+ uses : neuralmagic/nm-actions/actions/llama-cpp@main
11
+ with :
12
+ port : 8000
13
+ model : " aminkhalafi/Phi-3-mini-4k-instruct-Q4_K_M-GGUF"
14
+ context-size : 2048
15
+ ` ` `
Original file line number Diff line number Diff line change
1
+ name : " Run llama.cpp"
2
+ description : " Run llama.cpp OpenAI compatible web server"
3
+
4
+ inputs :
5
+ port :
6
+ description : " The port of running service"
7
+ required : false
8
+ default : 8080
9
+ model :
10
+ description : " The Hugging Face model"
11
+ required : false
12
+ default : " aminkhalafi/Phi-3-mini-4k-instruct-Q4_K_M-GGUF"
13
+ context-size :
14
+ description : " The size of input context size (tokens)"
15
+ required : false
16
+ default : 2048
17
+
18
+ runs :
19
+ using : " composite"
20
+ steps :
21
+ - name : Install llama.cpp
22
+ id : install
23
+ shell : bash
24
+ run : |
25
+ brew install llama.cpp
26
+
27
+ - name : Start llama.cpp web server
28
+ id : start
29
+ shell : bash
30
+ run : |
31
+ llama-server --hf-repo "${{inputs.port}}" -ctx-size "${{inputs.context-size}}" --port "${{inputs.port}}" &
32
+
33
+ - name : Wait llama server to be started
34
+ id : wait
35
+ shell : bash
36
+ run : |
37
+ sleep 10
You can’t perform that action at this time.
0 commit comments