diff --git a/kobo/apps/openrosa/apps/logger/exceptions.py b/kobo/apps/openrosa/apps/logger/exceptions.py index 62fbfaf64d..19be8d18e1 100644 --- a/kobo/apps/openrosa/apps/logger/exceptions.py +++ b/kobo/apps/openrosa/apps/logger/exceptions.py @@ -68,6 +68,10 @@ def __init__(self, message=t('The instance could not be parsed')): super().__init__(message) +class InvalidXMLCharacterError(Exception): + pass + + class LockedSubmissionError(Exception): def __init__(self, message=t('Submission is currently being processed.')): super().__init__(message) diff --git a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py index badaf13a19..442f94b590 100644 --- a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py +++ b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py @@ -135,8 +135,8 @@ def test_corrupted_submission(self): self.user.username, TempFileProxy(xml), None, None, request=request ) # No `DjangoUnicodeDecodeError` errors are raised anymore. - # An `ExpatError` is raised instead - text = 'Improperly formatted XML' + # An `InvalidXMLCharacterError` is raised instead + text = 'unsupported or invisible characters' self.assertContains(error, text, status_code=400) @pytest.mark.skipif( @@ -150,3 +150,23 @@ def test_check_exceeded_limit_on_submission(self): self._submit_simple_yes() patched.assert_any_call(self.user, UsageType.SUBMISSION) patched.assert_any_call(self.user, UsageType.STORAGE_BYTES) + + def test_rejects_invalid_xml_char_in_text_submission(self): + """ + Submitting an XML that contains an invalid XML control character (U+000C) + should be rejected + """ + # XML with an invalid character + invalid_char_xml = ( + '' + '' + 'Yes\u000CNo' + 'uuid:{}' + '' + ).format(str(uuid.uuid4())) + + error, instance = safe_create_instance( + self.user.username, TempFileProxy(invalid_char_xml), None, None + ) + text = 'unsupported or invisible characters' + self.assertContains(error, text, status_code=400) diff --git a/kobo/apps/openrosa/libs/utils/logger_tools.py b/kobo/apps/openrosa/libs/utils/logger_tools.py index 27be31438a..8418e564fa 100644 --- a/kobo/apps/openrosa/libs/utils/logger_tools.py +++ b/kobo/apps/openrosa/libs/utils/logger_tools.py @@ -51,6 +51,7 @@ InstanceIdMissingError, InstanceInvalidUserError, InstanceMultipleNodeError, + InvalidXMLCharacterError, LockedSubmissionError, TemporarilyUnavailableError, ) @@ -204,6 +205,7 @@ def create_instance( username = username.lower() xml = smart_str(xml_file.read()) + validate_xml_chars(xml) xml_hash = Instance.get_hash(xml) xform = get_xform_from_submission(xml, username, uuid) check_submission_permissions(request, xform) @@ -350,6 +352,33 @@ def dict2xform(submission: dict, xform_id_string: str) -> str: return xml_head + dict2xml(submission) + xml_tail +def validate_xml_chars(xml: str) -> None: + """ + Validate an XML submission for parser errors and disallowed XML characters + + - Some clients may include a parser error wrapper when they fail to + serialise form content that contains control/invisible characters. + - If either the wrapper or disallowed characters are present, this + function raises `InvalidXMLCharacterError`. + """ + invalid_xml_char_re = re.compile( + r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]' + ) + + has_parser_error = ' bool: """ @@ -473,6 +502,9 @@ def status_code(self): 'The owner of this survey has exceeded their submission limit.' ) result.http_error_response = OpenRosaResponsePaymentRequired(result.error) + except InvalidXMLCharacterError as e: + result.error = str(e) + result.http_error_response = OpenRosaResponseBadRequest(result.error) except AccountInactiveError: result.error = t('Account is not active') result.http_error_response = OpenRosaResponseNotAllowed(result.error)