Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 0cc8cf5

Browse files
authored
Merge pull request #392 from janhq/391-bug-stream-toggle-off-takes-forever-to-respond
391 bug stream toggle off takes forever to respond
2 parents d830ad3 + 6040177 commit 0cc8cf5

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

controllers/llamaCPP.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ std::shared_ptr<inferenceState> create_inference_state(llamaCPP *instance) {
2222
// --------------------------------------------
2323

2424
// Function to check if the model is loaded
25-
void check_model_loaded(llama_server_context &llama, const HttpRequestPtr &req,
26-
std::function<void(const HttpResponsePtr &)> &callback) {
25+
void check_model_loaded(
26+
llama_server_context &llama, const HttpRequestPtr &req,
27+
std::function<void(const HttpResponsePtr &)> &callback) {
2728
if (!llama.model_loaded_external) {
2829
Json::Value jsonResp;
2930
jsonResp["message"] =
@@ -299,13 +300,9 @@ void llamaCPP::chatCompletion(
299300
LOG_INFO << "Current completion text";
300301
LOG_INFO << formatted_output;
301302
#endif
302-
int task_id;
303-
304-
LOG_INFO << "Resolved request for task_id:" << task_id;
305303

306304
if (is_streamed) {
307305
auto state = create_inference_state(this);
308-
state->task_id = task_id;
309306
auto chunked_content_provider =
310307
[state, data](char *pBuffer, std::size_t nBuffSize) -> std::size_t {
311308
if (!state->is_streaming) {
@@ -386,9 +383,12 @@ void llamaCPP::chatCompletion(
386383
} else {
387384
Json::Value respData;
388385
auto resp = nitro_utils::nitroHttpResponse();
386+
int task_id = llama.request_completion(data, false, false, -1);
387+
LOG_INFO << "sent the non stream, waiting for respone";
389388
if (!json_value(data, "stream", false)) {
390389
std::string completion_text;
391390
task_result result = llama.next_result(task_id);
391+
LOG_INFO << "Here is the result:" << result.error;
392392
if (!result.error && result.stop) {
393393
int prompt_tokens = result.result_json["tokens_evaluated"];
394394
int predicted_tokens = result.result_json["tokens_predicted"];

0 commit comments

Comments
 (0)