Skip to content

Commit 67d62a5

Browse files
committed
Partially updated examples.
1 parent e022286 commit 67d62a5

File tree

9 files changed

+41
-40
lines changed

9 files changed

+41
-40
lines changed

examples/llama.android/llama/src/main/cpp/llama-android.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
// auto path_to_model = env->GetStringUTFChars(filename, 0);
5454
// LOGi("Loading model from %s", path_to_model);
5555
//
56-
// auto model = llama_load_model_from_file(path_to_model, model_params);
56+
// auto model = llama_model_load_from_file(path_to_model, model_params);
5757
// env->ReleaseStringUTFChars(filename, path_to_model);
5858
//
5959
// if (!model) {
@@ -91,12 +91,12 @@
9191
// ctx_params.n_threads = n_threads;
9292
// ctx_params.n_threads_batch = n_threads;
9393
//
94-
// llama_context * context = llama_new_context_with_model(model, ctx_params);
94+
// llama_context * context = llama_init_from_model(model, ctx_params);
9595
//
9696
// if (!context) {
97-
// LOGe("llama_new_context_with_model() returned null)");
97+
// LOGe("llama_init_from_model() returned null)");
9898
// env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
99-
// "llama_new_context_with_model() returned null)");
99+
// "llama_init_from_model() returned null)");
100100
// return 0;
101101
// }
102102
//
@@ -374,7 +374,7 @@
374374
// const auto new_token_id = llama_sample_token_greedy(context, &candidates_p);
375375
//
376376
// const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
377-
// if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) {
377+
// if (llama_vocab_is_eog(model, new_token_id) || n_cur == n_len) {
378378
// return nullptr;
379379
// }
380380
//

examples/nexa-omni-audio/main-encode.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
#include "common-nexa.h"
33

44
#include "whisper.h"
5-
#include "grammar-parser.h"
5+
// #include "grammar-parser.h"
6+
#include "llama-grammar.h"
67

78
#include <cmath>
89
#include <fstream>

examples/nexa-omni-audio/omni.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ static const char * sample(struct common_sampler * ctx_sampling,
628628
const llama_token id = common_sampler_sample(ctx_sampling, ctx_llama, -1);
629629
common_sampler_accept(ctx_sampling, id, true);
630630
static std::string ret;
631-
if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
631+
if (llama_vocab_is_eog(llama_model_get_vocab(llama_get_model(ctx_llama)), id)) {
632632
ret = "</s>";
633633
} else {
634634
ret = common_token_to_piece(ctx_llama, id);
@@ -661,7 +661,7 @@ struct omni_context *omni_init_context(omni_context_params &params)
661661

662662
llama_model_params model_params = common_model_params_to_llama(all_params.gpt);
663663

664-
llama_model *model = llama_load_model_from_file(all_params.gpt.model.c_str(), model_params);
664+
llama_model *model = llama_model_load_from_file(all_params.gpt.model.c_str(), model_params);
665665
if (model == NULL)
666666
{
667667
LLAMA_LOG_ERROR("%s: unable to load model\n", __func__);
@@ -671,7 +671,7 @@ struct omni_context *omni_init_context(omni_context_params &params)
671671
llama_context_params ctx_params = common_context_params_to_llama(all_params.gpt);
672672
ctx_params.n_ctx = all_params.gpt.n_ctx < 2048 ? 2048 : all_params.gpt.n_ctx; // we need a longer context size to process image embeddings
673673

674-
llama_context *ctx_llama = llama_new_context_with_model(model, ctx_params);
674+
llama_context *ctx_llama = llama_init_from_model(model, ctx_params);
675675

676676
if (ctx_llama == NULL)
677677
{
@@ -729,7 +729,7 @@ void omni_free(struct omni_context *ctx_omni)
729729

730730
static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_embed, int n_batch, int *n_past)
731731
{
732-
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
732+
int n_embd = llama_model_n_embd(llama_get_model(ctx_llama));
733733

734734
int n_audio_embed = audio_embed->ne[1];
735735
GGML_ASSERT(audio_embed->ne[0] == n_embd);
@@ -829,7 +829,7 @@ const char* omni_process_prompt(struct omni_context *ctx_omni, ggml_tensor *audi
829829

830830
LOG("\n");
831831

832-
struct common_sampler * ctx_sampling = common_sampler_init(ctx_omni->model, params.gpt.sparams);
832+
struct common_sampler * ctx_sampling = common_sampler_init(ctx_omni->model, params.gpt.sampling);
833833
if (!ctx_sampling) {
834834
fprintf(stderr, "%s: failed to initialize sampling subsystem\n", __func__);
835835
exit(1);
@@ -890,14 +890,14 @@ struct omni_streaming {
890890
: ctx_omni_(ctx), params_(params) {
891891
dec_cnt_ = 0;
892892
n_past_ = 0;
893-
ctx_sampling_ = common_sampler_init(ctx_omni_->model, params_.gpt.sparams);
893+
ctx_sampling_ = common_sampler_init(ctx_omni_->model, params_.gpt.sampling);
894894
};
895895

896896
int32_t sample() {
897897
llama_token id = common_sampler_sample(ctx_sampling_, ctx_omni_->ctx_llama, -1);
898898
common_sampler_accept(ctx_sampling_, id, true);
899899
static std::string ret_str;
900-
if (llama_token_is_eog(llama_get_model(ctx_omni_->ctx_llama), id)) {
900+
if (llama_vocab_is_eog(llama_model_get_vocab(llama_get_model(ctx_omni_->ctx_llama)), id)) {
901901
ret_str = "</s>";
902902
} else {
903903
ret_str = common_token_to_piece(ctx_omni_->ctx_llama, id);

examples/nexa-omni-audio/omni.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
#include "whisper.h"
44
#include "llama.h"
5-
#include "grammar-parser.h"
5+
// #include "grammar-parser.h"
66
#include "common.h"
77
#include "common-nexa.h"
88

examples/omni-vlm/omni-vlm-cli.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ static const char * sample(struct common_sampler * ctx_sampling,
5252
const llama_token id = common_sampler_sample(ctx_sampling, ctx_llama, -1);
5353
common_sampler_accept(ctx_sampling, id, true);
5454
static std::string ret;
55-
if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
55+
if (llama_vocab_is_eog(llama_model_get_vocab(llama_get_model(ctx_llama)), id)) {
5656
ret = "</s>";
5757
} else {
5858
ret = common_token_to_piece(ctx_llama, id);
@@ -167,10 +167,10 @@ static void process_prompt(struct omnivlm_context * ctx_omnivlm, struct omni_ima
167167

168168
LOG("\n");
169169

170-
params->sparams.temp = 0.0f;
171-
params->sparams.top_k = 1;
172-
params->sparams.top_p = 1.0f;
173-
struct common_sampler * ctx_sampling = common_sampler_init(ctx_omnivlm->model, params->sparams);
170+
params->sampling.temp = 0.0f;
171+
params->sampling.top_k = 1;
172+
params->sampling.top_p = 1.0f;
173+
struct common_sampler * ctx_sampling = common_sampler_init(ctx_omnivlm->model, params->sampling);
174174
if (!ctx_sampling) {
175175
LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
176176
exit(1);
@@ -201,7 +201,7 @@ static struct llama_model * omnivlm_init(common_params * params) {
201201

202202
llama_model_params model_params = common_model_params_to_llama(*params);
203203

204-
llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
204+
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
205205
if (model == NULL) {
206206
LOG_ERR("%s: unable to load model\n" , __func__);
207207
return NULL;
@@ -223,7 +223,7 @@ static struct omnivlm_context * omnivlm_init_context(common_params * params, lla
223223
llama_context_params ctx_params = common_context_params_to_llama(*params);
224224
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
225225

226-
llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
226+
llama_context * ctx_llama = llama_init_from_model(model, ctx_params);
227227

228228
if (ctx_llama == NULL) {
229229
LOG_ERR("%s: failed to create the llama_context\n" , __func__);

examples/omni-vlm/omni-vlm-wrapper.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@ struct omni_streaming_sample {
4747
:image_(image) {
4848
n_past_ = 0;
4949
dec_cnt_ = 0;
50-
params.sparams.top_k = 1;
51-
params.sparams.top_p = 1.0f;
52-
ctx_sampling_ = common_sampler_init(model, params.sparams);
50+
params.sampling.top_k = 1;
51+
params.sampling.top_p = 1.0f;
52+
ctx_sampling_ = common_sampler_init(model, params.sampling);
5353
}
5454

5555
int32_t sample() {
5656
const llama_token id = common_sampler_sample(ctx_sampling_, ctx_omnivlm->ctx_llama, -1);
5757
common_sampler_accept(ctx_sampling_, id, true);
58-
if (llama_token_is_eog(llama_get_model(ctx_omnivlm->ctx_llama), id)) {
58+
if (llama_vocab_is_eog(llama_model_get_vocab(llama_get_model(ctx_omnivlm->ctx_llama)), id)) {
5959
ret_str_ = "</s>";
6060
} else {
6161
ret_str_ = common_token_to_piece(ctx_omnivlm->ctx_llama, id);
@@ -97,7 +97,7 @@ static struct llama_model * omnivlm_init(common_params * params) {
9797

9898
llama_model_params model_params = common_model_params_to_llama(*params);
9999

100-
llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
100+
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
101101
if (model == NULL) {
102102
LOG_ERR("%s: unable to load model\n" , __func__);
103103
return NULL;
@@ -120,7 +120,7 @@ static struct omnivlm_context * omnivlm_init_context(common_params * params, lla
120120
llama_context_params ctx_params = common_context_params_to_llama(*params);
121121
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
122122

123-
llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
123+
llama_context * ctx_llama = llama_init_from_model(model, ctx_params);
124124

125125
if (ctx_llama == NULL) {
126126
LOG_ERR("%s: failed to create the llama_context\n" , __func__);
@@ -170,7 +170,7 @@ static const char * sample(struct common_sampler * smpl,
170170
const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
171171
common_sampler_accept(smpl, id, true);
172172
static std::string ret;
173-
if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
173+
if (llama_vocab_is_eog(llama_model_get_vocab(llama_get_model(ctx_llama)), id)) {
174174
ret = "</s>";
175175
} else {
176176
ret = common_token_to_piece(ctx_llama, id);
@@ -206,8 +206,8 @@ static const char* process_prompt(struct omnivlm_context * ctx_omnivlm, struct o
206206
}
207207
}
208208

209-
params->sparams.top_k = 1;
210-
params->sparams.top_p = 1.0f;
209+
params->sampling.top_k = 1;
210+
params->sampling.top_p = 1.0f;
211211

212212
eval_string(ctx_omnivlm->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, true);
213213
omnivlm_eval_image_embed(ctx_omnivlm->ctx_llama, image_embed, params->n_batch, &n_past);
@@ -217,7 +217,7 @@ static const char* process_prompt(struct omnivlm_context * ctx_omnivlm, struct o
217217

218218
LOG("\n");
219219

220-
struct common_sampler * smpl = common_sampler_init(ctx_omnivlm->model, params->sparams);
220+
struct common_sampler * smpl = common_sampler_init(ctx_omnivlm->model, params->sampling);
221221
if (!smpl) {
222222
LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
223223
exit(1);

examples/qwen2-audio/main-encode.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include "common-nexa.h"
33

44
#include "whisper.h"
5-
#include "grammar-parser.h"
5+
// #include "grammar-parser.h"
66

77
#include <cmath>
88
#include <fstream>

examples/qwen2-audio/qwen2.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ static const char * sample(struct common_sampler * ctx_sampling,
626626
const llama_token id = common_sampler_sample(ctx_sampling, ctx_llama, -1);
627627
common_sampler_accept(ctx_sampling, id, true);
628628
static std::string ret;
629-
if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
629+
if (llama_vocab_is_eog(llama_model_get_vocab(llama_get_model(ctx_llama)), id)) {
630630
ret = "</s>";
631631
} else {
632632
ret = common_token_to_piece(ctx_llama, id);
@@ -659,7 +659,7 @@ struct omni_context *omni_init_context(omni_context_params &params)
659659

660660
llama_model_params model_params = common_model_params_to_llama(all_params.gpt);
661661

662-
llama_model *model = llama_load_model_from_file(all_params.gpt.model.c_str(), model_params);
662+
llama_model *model = llama_model_load_from_file(all_params.gpt.model.c_str(), model_params);
663663
if (model == NULL)
664664
{
665665
LLAMA_LOG_ERROR("%s: unable to load model\n", __func__);
@@ -669,7 +669,7 @@ struct omni_context *omni_init_context(omni_context_params &params)
669669
llama_context_params ctx_params = common_context_params_to_llama(all_params.gpt);
670670
ctx_params.n_ctx = all_params.gpt.n_ctx < 2048 ? 2048 : all_params.gpt.n_ctx; // we need a longer context size to process image embeddings
671671

672-
llama_context *ctx_llama = llama_new_context_with_model(model, ctx_params);
672+
llama_context *ctx_llama = llama_init_from_model(model, ctx_params);
673673

674674
if (ctx_llama == NULL)
675675
{
@@ -730,7 +730,7 @@ void omni_free(struct omni_context *ctx_omni)
730730

731731
static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_embed, int n_batch, int *n_past)
732732
{
733-
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
733+
int n_embd = llama_model_n_embd(llama_get_model(ctx_llama));
734734

735735
int n_audio_embed = audio_embed->ne[1];
736736
GGML_ASSERT(audio_embed->ne[0] == n_embd);
@@ -839,7 +839,7 @@ const char* omni_process_prompt(struct omni_context *ctx_omni, ggml_tensor *audi
839839

840840
LOG("\n");
841841

842-
struct common_sampler * ctx_sampling = common_sampler_init(ctx_omni->model, params.gpt.sparams);
842+
struct common_sampler * ctx_sampling = common_sampler_init(ctx_omni->model, params.gpt.sampling);
843843
if (!ctx_sampling) {
844844
fprintf(stderr, "%s: failed to initialize sampling subsystem\n", __func__);
845845
exit(1);
@@ -901,14 +901,14 @@ struct omni_streaming {
901901
: ctx_omni_(ctx), params_(params) {
902902
dec_cnt_ = 0;
903903
n_past_ = 0;
904-
ctx_sampling_ = common_sampler_init(ctx_omni_->model, params_.gpt.sparams);
904+
ctx_sampling_ = common_sampler_init(ctx_omni_->model, params_.gpt.sampling);
905905
};
906906

907907
int32_t sample() {
908908
llama_token id = common_sampler_sample(ctx_sampling_, ctx_omni_->ctx_llama, -1);
909909
common_sampler_accept(ctx_sampling_, id, true);
910910
static std::string ret_str;
911-
if (llama_token_is_eog(llama_get_model(ctx_omni_->ctx_llama), id)) {
911+
if (llama_vocab_is_eog(llama_model_get_vocab(llama_get_model(ctx_omni_->ctx_llama)), id)) {
912912
ret_str = "</s>";
913913
} else {
914914
ret_str = common_token_to_piece(ctx_omni_->ctx_llama, id);

examples/qwen2-audio/qwen2.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
#include "whisper.h"
44
#include "llama.h"
5-
#include "grammar-parser.h"
5+
// #include "grammar-parser.h"
66
#include "common.h"
77
#include "common-nexa.h"
88

0 commit comments

Comments
 (0)