diff --git a/tests/python/conftest.py b/tests/python/conftest.py new file mode 100644 index 00000000..e9bfae4c --- /dev/null +++ b/tests/python/conftest.py @@ -0,0 +1,14 @@ +import multiprocessing +import pytest + + +# The default multiprocessing start method is "fork" which is not compatible with +# with runtime assertions that it is set to spawn. When running unit tests, it's +# possible to call an external library that sets the start method to "fork". +# Here we enforce the start method to be "spawn" for all tests before executing. +@pytest.fixture(scope="session", autouse=True) +def initialize_data_environment(): + try: + multiprocessing.set_start_method("spawn") + except Exception: + pass diff --git a/tests/python/test_code_composition.py b/tests/python/test_code_composition.py index f40bea49..5b7b5308 100644 --- a/tests/python/test_code_composition.py +++ b/tests/python/test_code_composition.py @@ -1,4 +1,3 @@ -import multiprocessing from unittest import TestCase from dolma.core.data_types import Document @@ -40,11 +39,6 @@ def baz(): class TestDolmaCodeProseCompositionClassifier(TestCase): def setUp(self) -> None: - try: - multiprocessing.set_start_method("spawn") - except Exception: - pass - self.code_composition_tagger = CodeProseCompositionClassifier() def test_prose_text(self):