diff --git a/kobo/apps/openrosa/apps/logger/exceptions.py b/kobo/apps/openrosa/apps/logger/exceptions.py
index 62fbfaf64d..19be8d18e1 100644
--- a/kobo/apps/openrosa/apps/logger/exceptions.py
+++ b/kobo/apps/openrosa/apps/logger/exceptions.py
@@ -68,6 +68,10 @@ def __init__(self, message=t('The instance could not be parsed')):
super().__init__(message)
+class InvalidXMLCharacterError(Exception):
+ pass
+
+
class LockedSubmissionError(Exception):
def __init__(self, message=t('Submission is currently being processed.')):
super().__init__(message)
diff --git a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
index badaf13a19..442f94b590 100644
--- a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
+++ b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
@@ -135,8 +135,8 @@ def test_corrupted_submission(self):
self.user.username, TempFileProxy(xml), None, None, request=request
)
# No `DjangoUnicodeDecodeError` errors are raised anymore.
- # An `ExpatError` is raised instead
- text = 'Improperly formatted XML'
+ # An `InvalidXMLCharacterError` is raised instead
+ text = 'unsupported or invisible characters'
self.assertContains(error, text, status_code=400)
@pytest.mark.skipif(
@@ -150,3 +150,23 @@ def test_check_exceeded_limit_on_submission(self):
self._submit_simple_yes()
patched.assert_any_call(self.user, UsageType.SUBMISSION)
patched.assert_any_call(self.user, UsageType.STORAGE_BYTES)
+
+ def test_rejects_invalid_xml_char_in_text_submission(self):
+ """
+ Submitting an XML that contains an invalid XML control character (U+000C)
+ should be rejected
+ """
+ # XML with an invalid character
+ invalid_char_xml = (
+ ''
+ ''
+ 'Yes\u000CNo'
+ 'uuid:{}'
+ ''
+ ).format(str(uuid.uuid4()))
+
+ error, instance = safe_create_instance(
+ self.user.username, TempFileProxy(invalid_char_xml), None, None
+ )
+ text = 'unsupported or invisible characters'
+ self.assertContains(error, text, status_code=400)
diff --git a/kobo/apps/openrosa/libs/utils/logger_tools.py b/kobo/apps/openrosa/libs/utils/logger_tools.py
index 27be31438a..8418e564fa 100644
--- a/kobo/apps/openrosa/libs/utils/logger_tools.py
+++ b/kobo/apps/openrosa/libs/utils/logger_tools.py
@@ -51,6 +51,7 @@
InstanceIdMissingError,
InstanceInvalidUserError,
InstanceMultipleNodeError,
+ InvalidXMLCharacterError,
LockedSubmissionError,
TemporarilyUnavailableError,
)
@@ -204,6 +205,7 @@ def create_instance(
username = username.lower()
xml = smart_str(xml_file.read())
+ validate_xml_chars(xml)
xml_hash = Instance.get_hash(xml)
xform = get_xform_from_submission(xml, username, uuid)
check_submission_permissions(request, xform)
@@ -350,6 +352,33 @@ def dict2xform(submission: dict, xform_id_string: str) -> str:
return xml_head + dict2xml(submission) + xml_tail
+def validate_xml_chars(xml: str) -> None:
+ """
+ Validate an XML submission for parser errors and disallowed XML characters
+
+ - Some clients may include a parser error wrapper when they fail to
+ serialise form content that contains control/invisible characters.
+ - If either the wrapper or disallowed characters are present, this
+ function raises `InvalidXMLCharacterError`.
+ """
+ invalid_xml_char_re = re.compile(
+ r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]'
+ )
+
+ has_parser_error = ' bool:
"""
@@ -473,6 +502,9 @@ def status_code(self):
'The owner of this survey has exceeded their submission limit.'
)
result.http_error_response = OpenRosaResponsePaymentRequired(result.error)
+ except InvalidXMLCharacterError as e:
+ result.error = str(e)
+ result.http_error_response = OpenRosaResponseBadRequest(result.error)
except AccountInactiveError:
result.error = t('Account is not active')
result.http_error_response = OpenRosaResponseNotAllowed(result.error)