|
1 | 1 | import asyncio |
2 | | -from concurrent.futures import ThreadPoolExecutor |
3 | 2 | from pathlib import Path |
4 | 3 | from queue import Queue |
5 | 4 | from threading import Event |
@@ -88,9 +87,9 @@ def __init__( |
88 | 87 | self._response_queue = Queue() |
89 | 88 | self.set_result_queue(self._response_queue) |
90 | 89 |
|
91 | | - # Create a thread pool for the fetch_responses_loop_async task to avoid |
92 | | - # being interfered by other tasks such as submit(). |
93 | | - self._fetch_responses_loop_executor = ThreadPoolExecutor(max_workers=1) |
| 90 | + # Note: We don't create a persistent ThreadPoolExecutor anymore |
| 91 | + # to avoid thread leaks. Instead, we use asyncio.to_thread() which |
| 92 | + # manages threads internally. |
94 | 93 |
|
95 | 94 | def submit(self, request: GenerationRequest): |
96 | 95 | """ Submits a request to the worker. """ |
@@ -128,11 +127,10 @@ def fetch_responses(self, timeout: Optional[float] = None) -> list: |
128 | 127 |
|
129 | 128 | async def fetch_responses_async(self, |
130 | 129 | timeout: Optional[float] = None) -> list: |
131 | | - # First, await any pending responses without blocking the event loop |
132 | | - loop = asyncio.get_event_loop() |
133 | | - responses = await loop.run_in_executor( |
134 | | - self._fetch_responses_loop_executor, |
135 | | - lambda: self.fetch_responses(timeout=timeout)) |
| 130 | + # Use asyncio.to_thread to avoid blocking the event loop |
| 131 | + # This is similar to fetch_stats_async and fetch_kv_cache_events_async |
| 132 | + responses = await asyncio.to_thread(self.fetch_responses, |
| 133 | + timeout=timeout) |
136 | 134 | return responses |
137 | 135 |
|
138 | 136 | async def fetch_stats_async(self, timeout: Optional[float] = None) -> list: |
|
0 commit comments