diff --git a/mwxml/element_iterator.py b/mwxml/element_iterator.py index 80b5486..3565a94 100644 --- a/mwxml/element_iterator.py +++ b/mwxml/element_iterator.py @@ -55,7 +55,12 @@ def __init__(self, element, pointer): def __iter__(self): while not self.done and self.pointer.depth() > self.depth: - event, element = next(self.pointer) + try: + event, element = next(self.pointer) + except StopIteration: + # Stream exhausted - this is normal completion + # PEP 479: Catch StopIteration to prevent it from escaping the generator + break if event == "start": sub_iterator = ElementIterator(element, self.pointer) @@ -69,7 +74,12 @@ def __iter__(self): def complete(self): while not self.done and self.pointer.depth() > self.depth: - event, element = next(self.pointer) + try: + event, element = next(self.pointer) + except StopIteration: + # Stream exhausted - this is normal completion + # PEP 479: Catch StopIteration to prevent it from escaping the generator + break if self.pointer.depth() > self.depth: element.clear() diff --git a/mwxml/iteration/tests/test_stopiteration_bug.py b/mwxml/iteration/tests/test_stopiteration_bug.py new file mode 100644 index 0000000..7341b2f --- /dev/null +++ b/mwxml/iteration/tests/test_stopiteration_bug.py @@ -0,0 +1,202 @@ +""" +Test suite for StopIteration bug fix (PEP 479 compatibility). + +This module tests that the mwxml library properly handles StopIteration +exceptions in Python 3.7+ where PEP 479 converts StopIteration raised +inside generators to RuntimeError. +""" + +import io +import pytest +import sys + +from ..dump import Dump + + +# Sample XML with valid MediaWiki structure +MINIMAL_XML = """ + + + Wikipedia + enwiki + + + Test Page + 0 + 1 + + 100 + 2021-01-01T00:00:00Z + Test content + + + +""" + +MULTI_PAGE_XML = """ + + + Wikipedia + enwiki + + + Page 1 + 0 + 1 + + 100 + 2021-01-01T00:00:00Z + Content 1 + + + + Page 2 + 0 + 2 + + 200 + 2021-01-02T00:00:00Z + Content 2 + + + +""" + + +@pytest.mark.skipif(sys.version_info < (3, 7), + reason="Bug only affects Python 3.7+") +def test_stopiteration_bug_reproduction(): + """ + Reproduce the StopIteration RuntimeError bug in Python 3.7+. + + This test demonstrates the bug that occurs when the XML stream is + exhausted and StopIteration propagates through a generator, which + PEP 479 converts to RuntimeError. + + NOTE: This test is expected to FAIL before the fix is applied and + PASS after the fix is applied. + """ + dump = Dump.from_file(io.StringIO(MINIMAL_XML)) + + # Before the fix, this should raise RuntimeError in Python 3.7+ + # After the fix, this should complete normally + try: + pages = list(dump) + # If we get here, the fix is working + assert len(pages) == 1 + assert pages[0].title == "Test Page" + except RuntimeError as e: + if "generator raised StopIteration" in str(e): + pytest.fail( + "Bug reproduced: RuntimeError raised due to StopIteration in generator. " + "The fix has not been applied yet." + ) + else: + # Some other RuntimeError, re-raise it + raise + + +def test_iteration_completes_normally(): + """ + Verify iteration completes without RuntimeError after fix. + + This test verifies that the fix properly handles stream exhaustion + and allows iteration to complete normally. + """ + dump = Dump.from_file(io.StringIO(MINIMAL_XML)) + + # Should complete without raising RuntimeError + pages = list(dump) + + # Verify the page was extracted + assert len(pages) == 1 + assert pages[0].title == "Test Page" + assert pages[0].id == 1 + assert pages[0].namespace == 0 + + +def test_multiple_pages_iteration(): + """ + Verify iteration works correctly with multiple pages. + + Tests that the fix doesn't break normal iteration over multiple + pages in the XML dump. + """ + dump = Dump.from_file(io.StringIO(MULTI_PAGE_XML)) + + # Should complete without raising RuntimeError + pages = list(dump) + + # Verify all pages were extracted + assert len(pages) == 2 + assert pages[0].title == "Page 1" + assert pages[0].id == 1 + assert pages[1].title == "Page 2" + assert pages[1].id == 2 + + +def test_iteration_with_generator_pattern(): + """ + Verify the fix works with generator iteration pattern. + + This tests that the fix works when using the dump as a generator + rather than converting to a list. + """ + dump = Dump.from_file(io.StringIO(MULTI_PAGE_XML)) + + # Iterate using generator pattern + page_titles = [] + for page in dump: + page_titles.append(page.title) + + # Should have collected all pages + assert page_titles == ["Page 1", "Page 2"] + + +def test_empty_dump_iteration(): + """ + Verify iteration handles empty dumps correctly. + + Tests edge case where there are no pages in the dump. + """ + empty_xml = """ + + + Wikipedia + enwiki + + + """ + + dump = Dump.from_file(io.StringIO(empty_xml)) + + # Should complete without error + pages = list(dump) + + # Should have no pages + assert len(pages) == 0 + + +def test_partial_iteration(): + """ + Verify partial iteration doesn't cause issues. + + Tests that breaking out of iteration early doesn't cause problems + with the StopIteration handling. + """ + dump = Dump.from_file(io.StringIO(MULTI_PAGE_XML)) + + # Only iterate over first page + first_page = None + for page in dump: + first_page = page + break + + # Should have gotten the first page + assert first_page is not None + assert first_page.title == "Page 1" + + +if __name__ == "__main__": + # Allow running tests directly + pytest.main([__file__, "-v"])