|
6 | 6 | #include "list.h"
|
7 | 7 | #include "parse.h"
|
8 | 8 |
|
9 |
| -char *json_token_str(char *js, jsmntok_t *t) |
10 |
| -{ |
11 |
| - char *str; |
12 |
| - int i, len = t->end - t->start + 1; |
| 9 | +/* parse_utf_16() and process_string() are copied from https://github.com/kgabis/parson */ |
| 10 | +#include <ctype.h> |
13 | 11 |
|
14 |
| - str = calloc(sizeof(char), len); |
15 |
| - if (str == NULL) |
16 |
| - return NULL; |
| 12 | +#define JSONFailure (-1) |
| 13 | +#define JSONSuccess (0) |
| 14 | +#define parson_malloc malloc |
| 15 | +#define parson_free free |
17 | 16 |
|
18 |
| - memcpy(str, js + t->start, len - 1); |
| 17 | +static int is_utf16_hex(const unsigned char *s) { |
| 18 | + return isxdigit(s[0]) && isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]); |
| 19 | +} |
19 | 20 |
|
20 |
| - for (i = 0; str[i] != '\0'; i++) { |
21 |
| - if (str[i] != '\\') |
22 |
| - continue; |
| 21 | +static int parse_utf_16(const char **unprocessed, char **processed) { |
| 22 | + unsigned int cp, lead, trail; |
| 23 | + char *processed_ptr = *processed; |
| 24 | + const char *unprocessed_ptr = *unprocessed; |
| 25 | + unprocessed_ptr++; /* skips u */ |
| 26 | + if (!is_utf16_hex((const unsigned char*)unprocessed_ptr) || sscanf(unprocessed_ptr, "%4x", &cp) == EOF) |
| 27 | + return JSONFailure; |
| 28 | + if (cp < 0x80) { |
| 29 | + *processed_ptr = cp; /* 0xxxxxxx */ |
| 30 | + } else if (cp < 0x800) { |
| 31 | + *processed_ptr++ = ((cp >> 6) & 0x1F) | 0xC0; /* 110xxxxx */ |
| 32 | + *processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */ |
| 33 | + } else if (cp < 0xD800 || cp > 0xDFFF) { |
| 34 | + *processed_ptr++ = ((cp >> 12) & 0x0F) | 0xE0; /* 1110xxxx */ |
| 35 | + *processed_ptr++ = ((cp >> 6) & 0x3F) | 0x80; /* 10xxxxxx */ |
| 36 | + *processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */ |
| 37 | + } else if (cp >= 0xD800 && cp <= 0xDBFF) { /* lead surrogate (0xD800..0xDBFF) */ |
| 38 | + lead = cp; |
| 39 | + unprocessed_ptr += 4; /* should always be within the buffer, otherwise previous sscanf would fail */ |
| 40 | + if (*unprocessed_ptr++ != '\\' || *unprocessed_ptr++ != 'u' || /* starts with \u? */ |
| 41 | + !is_utf16_hex((const unsigned char*)unprocessed_ptr) || |
| 42 | + sscanf(unprocessed_ptr, "%4x", &trail) == EOF || |
| 43 | + trail < 0xDC00 || trail > 0xDFFF) { /* valid trail surrogate? (0xDC00..0xDFFF) */ |
| 44 | + return JSONFailure; |
| 45 | + } |
| 46 | + cp = ((((lead-0xD800)&0x3FF)<<10)|((trail-0xDC00)&0x3FF))+0x010000; |
| 47 | + *processed_ptr++ = (((cp >> 18) & 0x07) | 0xF0); /* 11110xxx */ |
| 48 | + *processed_ptr++ = (((cp >> 12) & 0x3F) | 0x80); /* 10xxxxxx */ |
| 49 | + *processed_ptr++ = (((cp >> 6) & 0x3F) | 0x80); /* 10xxxxxx */ |
| 50 | + *processed_ptr = (((cp ) & 0x3F) | 0x80); /* 10xxxxxx */ |
| 51 | + } else { /* trail surrogate before lead surrogate */ |
| 52 | + return JSONFailure; |
| 53 | + } |
| 54 | + unprocessed_ptr += 3; |
| 55 | + *processed = processed_ptr; |
| 56 | + *unprocessed = unprocessed_ptr; |
| 57 | + return JSONSuccess; |
| 58 | +} |
23 | 59 |
|
24 |
| - if ((str + i + 1) == strstr(str + i, "u003e")) { |
25 |
| - str[i] = '>'; |
26 |
| - memmove(str + i + 1, str + i + 6, len - i - 6); |
27 |
| - } else if ((str + i + 1) == strstr(str + i, "u003c")) { |
28 |
| - str[i] = '<'; |
29 |
| - memmove(str + i + 1, str + i + 6, len - i - 6); |
30 |
| - } else if ((str + i + 1) == strstr(str + i, "u0026")) { |
31 |
| - str[i] = '&'; |
32 |
| - memmove(str + i + 1, str + i + 6, len - i - 6); |
33 |
| - } else { |
34 |
| - memmove(str + i, str + i + 1, len - i -1); |
35 |
| - } |
36 |
| - } |
37 | 60 |
|
38 |
| - return str; |
| 61 | +/* Copies and processes passed string up to supplied length. |
| 62 | + Example: "\u006Corem ipsum" -> lorem ipsum */ |
| 63 | +static char* process_string(const char *input, size_t len) { |
| 64 | + const char *input_ptr = input; |
| 65 | + size_t initial_size = (len + 1) * sizeof(char); |
| 66 | + //size_t final_size = 0; |
| 67 | + char *output = (char*)parson_malloc(initial_size); |
| 68 | + char *output_ptr = output; |
| 69 | + //char *resized_output = NULL; |
| 70 | + while ((*input_ptr != '\0') && (size_t)(input_ptr - input) < len) { |
| 71 | + if (*input_ptr == '\\') { |
| 72 | + input_ptr++; |
| 73 | + switch (*input_ptr) { |
| 74 | + case '\"': *output_ptr = '\"'; break; |
| 75 | + case '\\': *output_ptr = '\\'; break; |
| 76 | + case '/': *output_ptr = '/'; break; |
| 77 | + case 'b': *output_ptr = '\b'; break; |
| 78 | + case 'f': *output_ptr = '\f'; break; |
| 79 | + case 'n': *output_ptr = '\n'; break; |
| 80 | + case 'r': *output_ptr = '\r'; break; |
| 81 | + case 't': *output_ptr = '\t'; break; |
| 82 | + case 'u': |
| 83 | + if (parse_utf_16(&input_ptr, &output_ptr) == JSONFailure) |
| 84 | + goto error; |
| 85 | + break; |
| 86 | + default: |
| 87 | + goto error; |
| 88 | + } |
| 89 | + } else if ((unsigned char)*input_ptr < 0x20) { |
| 90 | + goto error; /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */ |
| 91 | + } else { |
| 92 | + *output_ptr = *input_ptr; |
| 93 | + } |
| 94 | + output_ptr++; |
| 95 | + input_ptr++; |
| 96 | + } |
| 97 | + *output_ptr = '\0'; |
| 98 | + /* resize to new length */ |
| 99 | + //final_size = (size_t)(output_ptr-output) + 1; |
| 100 | + //resized_output = (char*)parson_malloc(final_size); |
| 101 | + //if (resized_output == NULL) |
| 102 | + // goto error; |
| 103 | + //memcpy(resized_output, output, final_size); |
| 104 | + //parson_free(output); |
| 105 | + //return resized_output; |
| 106 | + return output; |
| 107 | +error: |
| 108 | + parson_free(output); |
| 109 | + return NULL; |
| 110 | +} |
| 111 | + |
| 112 | +char *json_token_str(char *js, jsmntok_t *t) |
| 113 | +{ |
| 114 | + return process_string(js+t->start, t->end - t->start); |
39 | 115 | }
|
40 | 116 |
|
41 | 117 | int json_token_int(char *js, jsmntok_t *t)
|
|
0 commit comments