diff --git a/tensorrt_llm/executor/ray_executor.py b/tensorrt_llm/executor/ray_executor.py index 5d87fdc9bfc..e0c810d7565 100644 --- a/tensorrt_llm/executor/ray_executor.py +++ b/tensorrt_llm/executor/ray_executor.py @@ -208,7 +208,7 @@ def submit(self, request: GenerationRequest) -> GenerationResult: self.call_all_ray_workers("enqueue_request", leader_only=True, request=request, - async_call=False, + async_call=True, result_wait_queue=result.queue) return result