Skip to content
This repository was archived by the owner on Feb 8, 2021. It is now read-only.

Commit e6cffd9

Browse files
committed
Merge pull request #39 from laijs/escape
handle all escaped character correctly
2 parents c03c057 + 213a00e commit e6cffd9

File tree

1 file changed

+101
-25
lines changed

1 file changed

+101
-25
lines changed

src/parse.c

Lines changed: 101 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,36 +6,112 @@
66
#include "list.h"
77
#include "parse.h"
88

9-
char *json_token_str(char *js, jsmntok_t *t)
10-
{
11-
char *str;
12-
int i, len = t->end - t->start + 1;
9+
/* parse_utf_16() and process_string() are copied from https://github.com/kgabis/parson */
10+
#include <ctype.h>
1311

14-
str = calloc(sizeof(char), len);
15-
if (str == NULL)
16-
return NULL;
12+
#define JSONFailure (-1)
13+
#define JSONSuccess (0)
14+
#define parson_malloc malloc
15+
#define parson_free free
1716

18-
memcpy(str, js + t->start, len - 1);
17+
static int is_utf16_hex(const unsigned char *s) {
18+
return isxdigit(s[0]) && isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]);
19+
}
1920

20-
for (i = 0; str[i] != '\0'; i++) {
21-
if (str[i] != '\\')
22-
continue;
21+
static int parse_utf_16(const char **unprocessed, char **processed) {
22+
unsigned int cp, lead, trail;
23+
char *processed_ptr = *processed;
24+
const char *unprocessed_ptr = *unprocessed;
25+
unprocessed_ptr++; /* skips u */
26+
if (!is_utf16_hex((const unsigned char*)unprocessed_ptr) || sscanf(unprocessed_ptr, "%4x", &cp) == EOF)
27+
return JSONFailure;
28+
if (cp < 0x80) {
29+
*processed_ptr = cp; /* 0xxxxxxx */
30+
} else if (cp < 0x800) {
31+
*processed_ptr++ = ((cp >> 6) & 0x1F) | 0xC0; /* 110xxxxx */
32+
*processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */
33+
} else if (cp < 0xD800 || cp > 0xDFFF) {
34+
*processed_ptr++ = ((cp >> 12) & 0x0F) | 0xE0; /* 1110xxxx */
35+
*processed_ptr++ = ((cp >> 6) & 0x3F) | 0x80; /* 10xxxxxx */
36+
*processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */
37+
} else if (cp >= 0xD800 && cp <= 0xDBFF) { /* lead surrogate (0xD800..0xDBFF) */
38+
lead = cp;
39+
unprocessed_ptr += 4; /* should always be within the buffer, otherwise previous sscanf would fail */
40+
if (*unprocessed_ptr++ != '\\' || *unprocessed_ptr++ != 'u' || /* starts with \u? */
41+
!is_utf16_hex((const unsigned char*)unprocessed_ptr) ||
42+
sscanf(unprocessed_ptr, "%4x", &trail) == EOF ||
43+
trail < 0xDC00 || trail > 0xDFFF) { /* valid trail surrogate? (0xDC00..0xDFFF) */
44+
return JSONFailure;
45+
}
46+
cp = ((((lead-0xD800)&0x3FF)<<10)|((trail-0xDC00)&0x3FF))+0x010000;
47+
*processed_ptr++ = (((cp >> 18) & 0x07) | 0xF0); /* 11110xxx */
48+
*processed_ptr++ = (((cp >> 12) & 0x3F) | 0x80); /* 10xxxxxx */
49+
*processed_ptr++ = (((cp >> 6) & 0x3F) | 0x80); /* 10xxxxxx */
50+
*processed_ptr = (((cp ) & 0x3F) | 0x80); /* 10xxxxxx */
51+
} else { /* trail surrogate before lead surrogate */
52+
return JSONFailure;
53+
}
54+
unprocessed_ptr += 3;
55+
*processed = processed_ptr;
56+
*unprocessed = unprocessed_ptr;
57+
return JSONSuccess;
58+
}
2359

24-
if ((str + i + 1) == strstr(str + i, "u003e")) {
25-
str[i] = '>';
26-
memmove(str + i + 1, str + i + 6, len - i - 6);
27-
} else if ((str + i + 1) == strstr(str + i, "u003c")) {
28-
str[i] = '<';
29-
memmove(str + i + 1, str + i + 6, len - i - 6);
30-
} else if ((str + i + 1) == strstr(str + i, "u0026")) {
31-
str[i] = '&';
32-
memmove(str + i + 1, str + i + 6, len - i - 6);
33-
} else {
34-
memmove(str + i, str + i + 1, len - i -1);
35-
}
36-
}
3760

38-
return str;
61+
/* Copies and processes passed string up to supplied length.
62+
Example: "\u006Corem ipsum" -> lorem ipsum */
63+
static char* process_string(const char *input, size_t len) {
64+
const char *input_ptr = input;
65+
size_t initial_size = (len + 1) * sizeof(char);
66+
//size_t final_size = 0;
67+
char *output = (char*)parson_malloc(initial_size);
68+
char *output_ptr = output;
69+
//char *resized_output = NULL;
70+
while ((*input_ptr != '\0') && (size_t)(input_ptr - input) < len) {
71+
if (*input_ptr == '\\') {
72+
input_ptr++;
73+
switch (*input_ptr) {
74+
case '\"': *output_ptr = '\"'; break;
75+
case '\\': *output_ptr = '\\'; break;
76+
case '/': *output_ptr = '/'; break;
77+
case 'b': *output_ptr = '\b'; break;
78+
case 'f': *output_ptr = '\f'; break;
79+
case 'n': *output_ptr = '\n'; break;
80+
case 'r': *output_ptr = '\r'; break;
81+
case 't': *output_ptr = '\t'; break;
82+
case 'u':
83+
if (parse_utf_16(&input_ptr, &output_ptr) == JSONFailure)
84+
goto error;
85+
break;
86+
default:
87+
goto error;
88+
}
89+
} else if ((unsigned char)*input_ptr < 0x20) {
90+
goto error; /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */
91+
} else {
92+
*output_ptr = *input_ptr;
93+
}
94+
output_ptr++;
95+
input_ptr++;
96+
}
97+
*output_ptr = '\0';
98+
/* resize to new length */
99+
//final_size = (size_t)(output_ptr-output) + 1;
100+
//resized_output = (char*)parson_malloc(final_size);
101+
//if (resized_output == NULL)
102+
// goto error;
103+
//memcpy(resized_output, output, final_size);
104+
//parson_free(output);
105+
//return resized_output;
106+
return output;
107+
error:
108+
parson_free(output);
109+
return NULL;
110+
}
111+
112+
char *json_token_str(char *js, jsmntok_t *t)
113+
{
114+
return process_string(js+t->start, t->end - t->start);
39115
}
40116

41117
int json_token_int(char *js, jsmntok_t *t)

0 commit comments

Comments
 (0)