diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index f78d4b8ca..4e06c818a 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -121,6 +121,9 @@ def get_response(request_future, error_type, social_network): if response.status_code: # Status code exists in response object error_context = None + except UnicodeDecodeError as err_unicode: + error_context = "Unicode Decode Error" + exception_text = str(err_unicode) except requests.exceptions.HTTPError as errh: error_context = "HTTP Error" exception_text = str(errh) diff --git a/tests/test_robustness.py b/tests/test_robustness.py new file mode 100644 index 000000000..09a9b3d42 --- /dev/null +++ b/tests/test_robustness.py @@ -0,0 +1,33 @@ +from unittest.mock import patch +from sherlock_project.sherlock import sherlock +from sherlock_project.notify import QueryNotify +from sherlock_project.result import QueryStatus, QueryResult + + +def test_sherlock_handles_unicode_decode_error_gracefully(): + """Test that Sherlock does not crash when a UnicodeDecodeError occurs.""" + error_to_raise: UnicodeDecodeError = UnicodeDecodeError('utf-8', b'\xe9', 0, 1, 'invalid continuation byte') + + with patch('concurrent.futures.Future.result', side_effect=error_to_raise): + username: str = "tést-usér" + site_data: dict[str, dict[str, str]] = { + "ExampleSite": { + "url": "https://www.example.com/{}", + "errorType": "status_code" + } + } + query_notify: QueryNotify = QueryNotify() + + results: dict[str, dict[str, str | QueryResult]] = sherlock( + username=username, + site_data=site_data, + query_notify=query_notify + ) + + site_result: dict[str, str | QueryResult] = results["ExampleSite"] + assert site_result is not None, "Results dictionary should contain the site" + + status_object = site_result["status"] + + assert status_object.status == QueryStatus.UNKNOWN, "The site status should be UNKNOWN" + assert "Unicode Decode Error" in str(status_object.context), "The context should mention the specific error"