Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions kobo/apps/openrosa/apps/logger/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def __init__(self, message=t('The instance could not be parsed')):
super().__init__(message)


class InvalidXMLCharacterError(Exception):
pass


class LockedSubmissionError(Exception):
def __init__(self, message=t('Submission is currently being processed.')):
super().__init__(message)
Expand Down
24 changes: 22 additions & 2 deletions kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ def test_corrupted_submission(self):
self.user.username, TempFileProxy(xml), None, None, request=request
)
# No `DjangoUnicodeDecodeError` errors are raised anymore.
# An `ExpatError` is raised instead
text = 'Improperly formatted XML'
# An `InvalidXMLCharacterError` is raised instead
text = 'unsupported or invisible characters'
self.assertContains(error, text, status_code=400)

@pytest.mark.skipif(
Expand All @@ -150,3 +150,23 @@ def test_check_exceeded_limit_on_submission(self):
self._submit_simple_yes()
patched.assert_any_call(self.user, UsageType.SUBMISSION)
patched.assert_any_call(self.user, UsageType.STORAGE_BYTES)

def test_rejects_invalid_xml_char_in_text_submission(self):
"""
Submitting an XML that contains an invalid XML control character (U+000C)
should be rejected
"""
# XML with an invalid character
invalid_char_xml = (
'<?xml version="1.0" ?>'
'<yes_or_no id="yes_or_no">'
'<yesno>Yes\u000CNo</yesno>'
'<meta><instanceID>uuid:{}</instanceID></meta>'
'</yes_or_no>'
).format(str(uuid.uuid4()))

error, instance = safe_create_instance(
self.user.username, TempFileProxy(invalid_char_xml), None, None
)
text = 'unsupported or invisible characters'
self.assertContains(error, text, status_code=400)
32 changes: 32 additions & 0 deletions kobo/apps/openrosa/libs/utils/logger_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
InstanceIdMissingError,
InstanceInvalidUserError,
InstanceMultipleNodeError,
InvalidXMLCharacterError,
LockedSubmissionError,
TemporarilyUnavailableError,
)
Expand Down Expand Up @@ -204,6 +205,7 @@ def create_instance(
username = username.lower()

xml = smart_str(xml_file.read())
validate_xml_chars(xml)
xml_hash = Instance.get_hash(xml)
xform = get_xform_from_submission(xml, username, uuid)
check_submission_permissions(request, xform)
Expand Down Expand Up @@ -350,6 +352,33 @@ def dict2xform(submission: dict, xform_id_string: str) -> str:
return xml_head + dict2xml(submission) + xml_tail


def validate_xml_chars(xml: str) -> None:
"""
Validate an XML submission for parser errors and disallowed XML characters

- Some clients may include a parser error wrapper when they fail to
serialise form content that contains control/invisible characters.
- If either the wrapper or disallowed characters are present, this
function raises `InvalidXMLCharacterError`.
"""
invalid_xml_char_re = re.compile(
r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]'
)

has_parser_error = '<parsererror' in xml or 'PCDATA invalid Char value' in xml
has_invalid_chars = invalid_xml_char_re.search(xml)

if has_parser_error or has_invalid_chars:
raise InvalidXMLCharacterError(
t(
'Submission rejected: '
'the form contains unsupported or invisible characters.'
)
)

return None


@contextlib.contextmanager
def get_instance_lock(submission_uuid: str, xform_id: int) -> bool:
"""
Expand Down Expand Up @@ -473,6 +502,9 @@ def status_code(self):
'The owner of this survey has exceeded their submission limit.'
)
result.http_error_response = OpenRosaResponsePaymentRequired(result.error)
except InvalidXMLCharacterError as e:
result.error = str(e)
result.http_error_response = OpenRosaResponseBadRequest(result.error)
except AccountInactiveError:
result.error = t('Account is not active')
result.http_error_response = OpenRosaResponseNotAllowed(result.error)
Expand Down