Skip to content
This repository has been archived by the owner on Apr 18, 2022. It is now read-only.

Commit

Permalink
Improve error reporting when serializing non-Unicode strings to JSON
Browse files Browse the repository at this point in the history
* src/coding.c (utf8_string_p): New helper function.
(syms_of_coding) <utf-8-unix>: Move from json.c.

* src/json.c (json_check_utf8): New helper function.
(lisp_to_json_toplevel_1, lisp_to_json): Use it.  To save a bit of
time, check for invalid UTF-8 strings only after encountering an
error, since Jansson already rejects them.

* test/src/json-tests.el (json-serialize/invalid-unicode): Adapt
expected error symbol.
  • Loading branch information
phst committed Dec 30, 2017
1 parent 30ffc25 commit a5835df
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 15 deletions.
22 changes: 22 additions & 0 deletions src/coding.c
Original file line number Diff line number Diff line change
Expand Up @@ -6360,6 +6360,27 @@ check_utf_8 (struct coding_system *coding)
}


/* Return whether STRING is a valid UTF-8 string. STRING must be a
unibyte string. */

bool
utf8_string_p (Lisp_Object string)
{
eassert (!STRING_MULTIBYTE (string));
struct coding_system coding;
setup_coding_system (Qutf_8_unix, &coding);
/* We initialize only the fields that check_utf_8 accesses. */
coding.head_ascii = -1;
coding.src_pos = 0;
coding.src_pos_byte = 0;
coding.src_chars = SCHARS (string);
coding.src_bytes = SBYTES (string);
coding.src_object = string;
coding.eol_seen = EOL_SEEN_NONE;
return check_utf_8 (&coding) != -1;
}


/* Detect how end-of-line of a text of length SRC_BYTES pointed by
SOURCE is encoded. If CATEGORY is one of
coding_category_utf_16_XXXX, assume that CR and LF are encoded by
Expand Down Expand Up @@ -10846,6 +10867,7 @@ syms_of_coding (void)
DEFSYM (Qiso_2022, "iso-2022");

DEFSYM (Qutf_8, "utf-8");
DEFSYM (Qutf_8_unix, "utf-8-unix");
DEFSYM (Qutf_8_emacs, "utf-8-emacs");

#if defined (WINDOWSNT) || defined (CYGWIN)
Expand Down
1 change: 1 addition & 0 deletions src/coding.h
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ struct coding_system
/* Extern declarations. */
extern Lisp_Object code_conversion_save (bool, bool);
extern bool encode_coding_utf_8 (struct coding_system *);
extern bool utf8_string_p (Lisp_Object);
extern void setup_coding_system (Lisp_Object, struct coding_system *);
extern Lisp_Object coding_charset_list (struct coding_system *);
extern Lisp_Object coding_system_charset_list (Lisp_Object);
Expand Down
33 changes: 25 additions & 8 deletions src/json.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,15 @@ json_check (json_t *object)
return object;
}

/* If STRING is not a valid UTF-8 string, signal an error of type
`wrong-type-argument'. STRING must be a unibyte string. */

static void
json_check_utf8 (Lisp_Object string)
{
CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string);
}

static json_t *lisp_to_json (Lisp_Object);

/* Convert a Lisp object to a toplevel JSON object (array or object).
Expand Down Expand Up @@ -363,9 +372,12 @@ lisp_to_json_toplevel_1 (Lisp_Object lisp, json_t **json)
int status = json_object_set_new (*json, key_str,
lisp_to_json (HASH_VALUE (h, i)));
if (status == -1)
/* FIXME: A failure here might also indicate that the
key is not a valid Unicode string. */
json_out_of_memory ();
{
/* A failure can be caused either by an invalid key or
by low memory. */
json_check_utf8 (key);
json_out_of_memory ();
}
}
clear_unwind_protect (count);
return unbind_to (count, Qnil);
Expand Down Expand Up @@ -447,9 +459,15 @@ lisp_to_json (Lisp_Object lisp)
else if (STRINGP (lisp))
{
Lisp_Object encoded = json_encode (lisp);
/* FIXME: We might throw an out-of-memory error here if the
string is not valid Unicode. */
return json_check (json_stringn (SSDATA (encoded), SBYTES (encoded)));
json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded));
if (json == NULL)
{
/* A failure can be caused either by an invalid string or by
low memory. */
json_check_utf8 (encoded);
json_out_of_memory ();
}
return json;
}

/* LISP now must be a vector, hashtable, or alist. */
Expand Down Expand Up @@ -863,8 +881,7 @@ syms_of_json (void)

DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p");
DEFSYM (Qjson_value_p, "json-value-p");

DEFSYM (Qutf_8_unix, "utf-8-unix");
DEFSYM (Qutf_8_string_p, "utf-8-string-p");

DEFSYM (Qjson_error, "json-error");
DEFSYM (Qjson_out_of_memory, "json-out-of-memory");
Expand Down
12 changes: 5 additions & 7 deletions test/src/json-tests.el
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,11 @@

(ert-deftest json-serialize/invalid-unicode ()
(skip-unless (fboundp 'json-serialize))
;; FIXME: "out of memory" is the wrong error signal, but we don't
;; currently distinguish between error types when serializing.
(should-error (json-serialize ["a\uDBBBb"]) :type 'json-out-of-memory)
(should-error (json-serialize ["u\x110000v"]) :type 'json-out-of-memory)
(should-error (json-serialize ["u\x3FFFFFv"]) :type 'json-out-of-memory)
(should-error (json-serialize ["u\xCCv"]) :type 'json-out-of-memory)
(should-error (json-serialize ["u\u00C4\xCCv"]) :type 'json-out-of-memory))
(should-error (json-serialize ["a\uDBBBb"]) :type 'wrong-type-argument)
(should-error (json-serialize ["u\x110000v"]) :type 'wrong-type-argument)
(should-error (json-serialize ["u\x3FFFFFv"]) :type 'wrong-type-argument)
(should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument)
(should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument))

(ert-deftest json-parse-string/null ()
(skip-unless (fboundp 'json-parse-string))
Expand Down

0 comments on commit a5835df

Please sign in to comment.