Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 7f5a20a

Browse files
authored
Merge pull request #434 from janhq/chore/request-cancel-on-done
chore: release request upon completion
2 parents e067514 + f6fa86d commit 7f5a20a

File tree

1 file changed

+6
-9
lines changed

1 file changed

+6
-9
lines changed

controllers/llamaCPP.cc

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -338,12 +338,10 @@ void llamaCPP::inferenceImpl(
338338

339339
if (!pBuffer) {
340340
LOG_INFO << "Connection closed or buffer is null. Reset context";
341-
state->instance->llama.request_cancel(state->task_id);
342341
state->inferenceStatus = FINISHED;
343342
return 0;
344343
}
345344

346-
347345
task_result result = state->instance->llama.next_result(state->task_id);
348346
if (!result.error) {
349347
const std::string to_send = result.result_json["content"];
@@ -367,7 +365,6 @@ void llamaCPP::inferenceImpl(
367365
std::size_t nRead = std::min(str.size(), nBuffSize);
368366
memcpy(pBuffer, str.data(), nRead);
369367
LOG_INFO << "reached result stop";
370-
state->instance->llama.request_cancel(state->task_id);
371368
state->inferenceStatus = FINISHED;
372369
}
373370

@@ -401,11 +398,13 @@ void llamaCPP::inferenceImpl(
401398
if(state->inferenceStatus == PENDING) {
402399
retries += 1;
403400
}
404-
LOG_INFO << "Wait for task to be released:" << state->task_id;
405-
std::this_thread::sleep_for(std::chrono::milliseconds(300));
401+
if(state->inferenceStatus != RUNNING)
402+
LOG_INFO << "Wait for task to be released:" << state->task_id;
403+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
406404
}
405+
// Request completed, release it
406+
state->instance->llama.request_cancel(state->task_id);
407407
});
408-
return;
409408
} else {
410409
Json::Value respData;
411410
auto resp = nitro_utils::nitroHttpResponse();
@@ -424,11 +423,9 @@ void llamaCPP::inferenceImpl(
424423
prompt_tokens, predicted_tokens);
425424
resp->setBody(full_return);
426425
} else {
427-
resp->setBody("internal error during inference");
428-
return;
426+
resp->setBody("Internal error during inference");
429427
}
430428
callback(resp);
431-
return;
432429
}
433430
}
434431
}

0 commit comments

Comments
 (0)