Skip to content

Commit 001bcee

Browse files
Fix log file mixing issue by using ProcessPoolExecutor
The build workflow was experiencing log file corruption and I/O errors due to concurrent builds writing to the wrong log files. This was caused by using ThreadPoolExecutor with contextlib.redirect_stdout/stderr, which only provides thread-local redirection of Python-level writes. The SDK's build() function spawns subprocesses and uses logger.info()/warning() to output build logs. Logger handlers write to process-wide file descriptors, not thread-local redirected streams, causing output from concurrent threads to: - Write to the wrong log files - Attempt writing to closed file handles - Result in ValueError('I/O operation on closed file.') Solution: Replace ThreadPoolExecutor with ProcessPoolExecutor to provide complete process-level isolation with separate stdout/stderr/logging per build. The additional overhead is negligible compared to Docker build time. Changes: - Import ProcessPoolExecutor instead of ThreadPoolExecutor - Move build_one_fn to module level (_build_with_logging) for pickle support - Update executor initialization to use ProcessPoolExecutor - Add explanatory comments about isolation requirements Co-authored-by: openhands <[email protected]>
1 parent cdd7200 commit 001bcee

File tree

1 file changed

+19
-11
lines changed

1 file changed

+19
-11
lines changed

benchmarks/swe_bench/build_images.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import contextlib
1313
import io
1414
import sys
15-
from concurrent.futures import ThreadPoolExecutor, as_completed
15+
from concurrent.futures import ProcessPoolExecutor, as_completed
1616
from datetime import UTC, datetime
1717
from pathlib import Path
1818
from threading import Lock
@@ -162,6 +162,19 @@ def _default_build_output_dir(
162162
return root
163163

164164

165+
def _build_with_logging(
166+
base: str, log_dir: Path, args: argparse.Namespace
167+
) -> BuildOutput:
168+
"""
169+
Module-level function for building a single image with output capture.
170+
Must be at module level to be picklable for ProcessPoolExecutor.
171+
"""
172+
with capture_output(base, log_dir) as log_path:
173+
result = build_one(base, args)
174+
result.log_path = str(log_path)
175+
return result
176+
177+
165178
def _update_pbar(
166179
pbar: tqdm,
167180
successes: int,
@@ -191,12 +204,6 @@ def main(argv: list[str]) -> int:
191204
manifest_path = BUILD_DIR / "manifest.jsonl"
192205
manifest_path.parent.mkdir(parents=True, exist_ok=True)
193206

194-
def build_one_fn(base: str, args) -> BuildOutput:
195-
with capture_output(base, BUILD_LOG_DIR) as log_path:
196-
result = build_one(base, args)
197-
result.log_path = str(log_path)
198-
return result
199-
200207
if args.dry_run:
201208
print("\n".join(bases))
202209
return 0
@@ -212,13 +219,14 @@ def build_one_fn(base: str, args) -> BuildOutput:
212219
):
213220
_update_pbar(pbar, successes, failures, 0, None, "Queueing")
214221

215-
# Single unified path: ThreadPoolExecutor( max_workers = args.max_workers ),
216-
# even if it's 1
217-
with ThreadPoolExecutor(max_workers=args.max_workers) as ex:
222+
# Single unified path: ProcessPoolExecutor( max_workers = args.max_workers ),
223+
# even if it's 1. Using processes instead of threads ensures proper isolation
224+
# of stdout/stderr and logging handlers, preventing output mixing between builds.
225+
with ProcessPoolExecutor(max_workers=args.max_workers) as ex:
218226
futures = {}
219227
for base in bases:
220228
in_progress.add(base)
221-
fut = ex.submit(build_one_fn, base, args)
229+
fut = ex.submit(_build_with_logging, base, BUILD_LOG_DIR, args)
222230
futures[fut] = base
223231

224232
_update_pbar(

0 commit comments

Comments
 (0)