Skip to content

Commit 0907efa

Browse files
committed
Implement llama-pull tool
Complete llama-pull tool with documentation Signed-off-by: Eric Curtin <[email protected]>
1 parent 4e0388a commit 0907efa

File tree

5 files changed

+123
-0
lines changed

5 files changed

+123
-0
lines changed

common/arg.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,6 +1393,11 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
13931393
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
13941394
}
13951395

1396+
// Both cannot be specified at the same time
1397+
if (!params.model.hf_repo.empty() && !params.model.docker_repo.empty()) {
1398+
throw std::invalid_argument("error: cannot specify both -hf and -dr options\n");
1399+
}
1400+
13961401
// handle model and download
13971402
{
13981403
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
@@ -1727,6 +1732,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17271732
[](common_params &) {
17281733
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
17291734
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
1735+
fprintf(stderr, "model cache path: %s\n", fs_get_cache_directory().c_str());
17301736
exit(0);
17311737
}
17321738
));

tools/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ else()
1818
add_subdirectory(gguf-split)
1919
add_subdirectory(imatrix)
2020
add_subdirectory(llama-bench)
21+
add_subdirectory(pull)
2122
add_subdirectory(main)
2223
add_subdirectory(perplexity)
2324
add_subdirectory(quantize)

tools/pull/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
set(TARGET llama-pull)
2+
add_executable(${TARGET} pull.cpp)
3+
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
4+
target_compile_features(${TARGET} PRIVATE cxx_std_17)
5+
6+
if(LLAMA_TOOLS_INSTALL)
7+
install(TARGETS ${TARGET} RUNTIME)
8+
endif()

tools/pull/README.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# llama-pull - Model Download Tool
2+
3+
A command-line tool for downloading AI models from HuggingFace and [Docker Hub](https://hub.docker.com/u/ai) for use with llama.cpp.
4+
5+
## Usage
6+
7+
```bash
8+
# Download from HuggingFace
9+
llama-pull -hf <user>/<model>[:<quant>]
10+
11+
# Download from Docker Hub
12+
llama-pull -dr [<repo>/]<model>[:<quant>]
13+
```
14+
15+
## Options
16+
17+
- `-hf, --hf-repo REPO` - Download model from HuggingFace repository
18+
- `-dr, --docker-repo REPO` - Download model from Docker Hub
19+
- `--hf-token TOKEN` - HuggingFace token for private repositories
20+
- `-h, --help` - Show help message
21+
22+
## Examples
23+
24+
```bash
25+
# Download a HuggingFace model
26+
llama-pull -hf microsoft/DialoGPT-medium
27+
28+
# Download a Docker model (ai/ repo is default)
29+
llama-pull -dr gemma3
30+
31+
# Download with specific quantization
32+
llama-pull -hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M
33+
```
34+
35+
## Model Storage
36+
37+
Downloaded models are stored in the standard llama.cpp cache directory:
38+
- Linux: `~/.cache/llama.cpp/`, macOS: `~/Library/Caches/llama.cpp`
39+
- The models can then be used with other llama.cpp tools
40+
41+
## Requirements
42+
43+
- Built with `LLAMA_USE_CURL=ON` (default) for download functionality

tools/pull/pull.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#include "arg.h"
2+
#include "common.h"
3+
#include "log.h"
4+
5+
#include <cstdio>
6+
#include <string>
7+
8+
static void print_usage(int, char ** argv) {
9+
LOG("Usage: %s [options]\n", argv[0]);
10+
LOG("\n");
11+
LOG("Download models from HuggingFace or Docker Hub\n");
12+
LOG("\n");
13+
LOG("Options:\n");
14+
LOG(" -h, --help show this help message and exit\n");
15+
LOG(" -hf, -hfr, --hf-repo REPO download model from HuggingFace repo\n");
16+
LOG(" format: <user>/<model>[:<quant>]\n");
17+
LOG(" example: microsoft/DialoGPT-medium\n");
18+
LOG(" -dr, --docker-repo REPO download model from Docker Hub\n");
19+
LOG(" format: [<repo>/]<model>[:<quant>]\n");
20+
LOG(" example: gemma3\n");
21+
LOG(" --hf-token TOKEN HuggingFace token for private repos\n");
22+
LOG("\n");
23+
LOG("Examples:\n");
24+
LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]);
25+
LOG(" %s -dr gemma3\n", argv[0]);
26+
LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]);
27+
LOG("\n");
28+
}
29+
30+
int main(int argc, char ** argv) {
31+
common_params params;
32+
33+
// Parse command line arguments
34+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON, print_usage)) {
35+
print_usage(argc, argv);
36+
return 1;
37+
}
38+
39+
// Check if help was requested or no download option provided
40+
if (params.model.hf_repo.empty() && params.model.docker_repo.empty()) {
41+
LOG_ERR("error: must specify either -hf <repo> or -dr <repo>\n");
42+
print_usage(argc, argv);
43+
return 1;
44+
}
45+
46+
LOG_INF("llama-pull: downloading model...\n");
47+
try {
48+
// Use the existing model handling logic which downloads the model
49+
common_init_result llama_init = common_init_from_params(params);
50+
if (llama_init.model != nullptr) {
51+
LOG_INF("Model downloaded and loaded successfully to: %s\n", params.model.path.c_str());
52+
53+
// We only want to download, not keep the model loaded
54+
// The download happens during common_init_from_params
55+
} else {
56+
LOG_ERR("Failed to download or load model\n");
57+
return 1;
58+
}
59+
} catch (const std::exception & e) {
60+
LOG_ERR("Error: %s\n", e.what());
61+
return 1;
62+
}
63+
64+
return 0;
65+
}

0 commit comments

Comments
 (0)