Skip to content

[build] Monkeypatch gen_rst to call inside subprocess #3355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .jenkins/validate_tutorials_built.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
"advanced_source/semi_structured_sparse", # reenable after 3303 is fixed.
"intermediate_source/torchrec_intro_tutorial", # reenable after 3302 is fixe
"intermediate_source/memory_format_tutorial", # causes other tutorials like torch_logs fail. "state" issue, reseting dynamo didn't help
]

def tutorial_source_dirs() -> List[Path]:
Expand Down
57 changes: 40 additions & 17 deletions conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
sys.path.insert(0, os.path.abspath('./.jenkins'))
import pytorch_sphinx_theme
import torch
import numpy
import gc
import glob
import random
import shutil
Expand All @@ -49,6 +47,46 @@
pio.renderers.default = 'sphinx_gallery'


import sphinx_gallery.gen_rst
import multiprocessing

# Monkey patch sphinx gallery to run each example in an isolated process so that
# we don't need to worry about examples changing global state.
#
# Alt option 1: Parallelism was added to sphinx gallery (a later version that we
# are not using yet) using joblib, but it seems to result in errors for us, and
# it has no effect if you set parallel = 1 (it will not put each file run into
# its own process and run singly) so you need parallel >= 2, and there may be
# tutorials that cannot be run in parallel.
#
# Alt option 2: Run sphinx gallery once per file (similar to how we shard in CI
# but with shard sizes of 1), but running sphinx gallery for each file has a
# ~5min overhead, resulting in the entire suite taking ~2x time
def call_fn(func, args, kwargs, result_queue):
try:
result = func(*args, **kwargs)
result_queue.put((True, result))
except Exception as e:
result_queue.put((False, str(e)))

def call_in_subprocess(func):
def wrapper(*args, **kwargs):
result_queue = multiprocessing.Queue()
p = multiprocessing.Process(
target=call_fn,
args=(func, args, kwargs, result_queue)
)
p.start()
p.join()
success, result = result_queue.get()
if success:
return result
else:
raise RuntimeError(f"Error in subprocess: {result}")
return wrapper

sphinx_gallery.gen_rst.generate_file_rst = call_in_subprocess(sphinx_gallery.gen_rst.generate_file_rst)

try:
import torchvision
except ImportError:
Expand Down Expand Up @@ -97,20 +135,6 @@

# -- Sphinx-gallery configuration --------------------------------------------

def reset_seeds(gallery_conf, fname):
torch.cuda.empty_cache()
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch._dynamo.reset()
torch._inductor.config.force_disable_caches = True
torch.manual_seed(42)
torch.set_default_device(None)
random.seed(10)
numpy.random.seed(10)
torch.set_grad_enabled(True)

gc.collect()

sphinx_gallery_conf = {
'examples_dirs': ['beginner_source', 'intermediate_source',
'advanced_source', 'recipes_source', 'prototype_source'],
Expand All @@ -121,7 +145,6 @@ def reset_seeds(gallery_conf, fname):
'first_notebook_cell': ("# For tips on running notebooks in Google Colab, see\n"
"# https://pytorch.org/tutorials/beginner/colab\n"
"%matplotlib inline"),
'reset_modules': (reset_seeds),
'ignore_pattern': r'_torch_export_nightly_tutorial.py',
'pypandoc': {'extra_args': ['--mathjax', '--toc'],
'filters': ['.jenkins/custom_pandoc_filter.py'],
Expand Down