@@ -996,13 +996,12 @@ static inline VALUE vstate_get(struct generate_json_data *data)
996996 return data -> vstate ;
997997}
998998
999- struct hash_foreach_arg {
1000- VALUE hash ;
1001- struct generate_json_data * data ;
1002- int first_key_type ;
1003- bool first ;
1004- bool mixed_keys_encountered ;
1005- };
999+ static VALUE
1000+ json_call_as_json (JSON_Generator_State * state , VALUE object , VALUE is_key )
1001+ {
1002+ VALUE proc_args [2 ] = {object , is_key };
1003+ return rb_proc_call_with_block (state -> as_json , 2 , proc_args , Qnil );
1004+ }
10061005
10071006static VALUE
10081007convert_string_subclass (VALUE key )
@@ -1019,6 +1018,130 @@ convert_string_subclass(VALUE key)
10191018 return key_to_s ;
10201019}
10211020
1021+
1022+ static bool enc_utf8_compatible_p (int enc_idx )
1023+ {
1024+ if (enc_idx == usascii_encindex ) return true;
1025+ if (enc_idx == utf8_encindex ) return true;
1026+ return false;
1027+ }
1028+
1029+ static VALUE encode_json_string_try (VALUE str )
1030+ {
1031+ return rb_funcall (str , i_encode , 1 , Encoding_UTF_8 );
1032+ }
1033+
1034+ static VALUE encode_json_string_rescue (VALUE str , VALUE exception )
1035+ {
1036+ raise_generator_error_str (str , rb_funcall (exception , rb_intern ("message" ), 0 ));
1037+ return Qundef ;
1038+ }
1039+
1040+ static inline bool valid_json_string_p (VALUE str )
1041+ {
1042+ int coderange = rb_enc_str_coderange (str );
1043+
1044+ if (RB_LIKELY (coderange == ENC_CODERANGE_7BIT )) {
1045+ return true;
1046+ }
1047+
1048+ if (RB_LIKELY (coderange == ENC_CODERANGE_VALID )) {
1049+ return enc_utf8_compatible_p (RB_ENCODING_GET_INLINED (str ));
1050+ }
1051+
1052+ return false;
1053+ }
1054+
1055+ static inline VALUE ensure_valid_encoding (struct generate_json_data * data , VALUE str , bool as_json_called , bool is_key )
1056+ {
1057+ if (RB_LIKELY (valid_json_string_p (str ))) {
1058+ return str ;
1059+ }
1060+
1061+ if (!as_json_called && data -> state -> strict && RTEST (data -> state -> as_json )) {
1062+ VALUE coerced_str = json_call_as_json (data -> state , str , Qfalse );
1063+ if (coerced_str != str ) {
1064+ if (RB_TYPE_P (coerced_str , T_STRING )) {
1065+ if (!valid_json_string_p (coerced_str )) {
1066+ raise_generator_error (str , "source sequence is illegal/malformed utf-8" );
1067+ }
1068+ } else {
1069+ // as_json could return another type than T_STRING
1070+ if (is_key ) {
1071+ raise_generator_error (coerced_str , "%" PRIsVALUE " not allowed as object key in JSON" , CLASS_OF (coerced_str ));
1072+ }
1073+ }
1074+
1075+ return coerced_str ;
1076+ }
1077+ }
1078+
1079+ if (RB_ENCODING_GET_INLINED (str ) == binary_encindex ) {
1080+ VALUE utf8_string = rb_enc_associate_index (rb_str_dup (str ), utf8_encindex );
1081+ switch (rb_enc_str_coderange (utf8_string )) {
1082+ case ENC_CODERANGE_7BIT :
1083+ return utf8_string ;
1084+ case ENC_CODERANGE_VALID :
1085+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1086+ // TODO: Raise in 3.0.0
1087+ rb_warn ("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0" );
1088+ return utf8_string ;
1089+ break ;
1090+ }
1091+ }
1092+
1093+ return rb_rescue (encode_json_string_try , str , encode_json_string_rescue , str );
1094+ }
1095+
1096+ static void raw_generate_json_string (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
1097+ {
1098+ fbuffer_append_char (buffer , '"' );
1099+
1100+ long len ;
1101+ search_state search ;
1102+ search .buffer = buffer ;
1103+ RSTRING_GETMEM (obj , search .ptr , len );
1104+ search .cursor = search .ptr ;
1105+ search .end = search .ptr + len ;
1106+
1107+ #ifdef HAVE_SIMD
1108+ search .matches_mask = 0 ;
1109+ search .has_matches = false;
1110+ search .chunk_base = NULL ;
1111+ #endif /* HAVE_SIMD */
1112+
1113+ switch (rb_enc_str_coderange (obj )) {
1114+ case ENC_CODERANGE_7BIT :
1115+ case ENC_CODERANGE_VALID :
1116+ if (RB_UNLIKELY (data -> state -> ascii_only )) {
1117+ convert_UTF8_to_ASCII_only_JSON (& search , data -> state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
1118+ } else if (RB_UNLIKELY (data -> state -> script_safe )) {
1119+ convert_UTF8_to_script_safe_JSON (& search );
1120+ } else {
1121+ convert_UTF8_to_JSON (& search );
1122+ }
1123+ break ;
1124+ default :
1125+ raise_generator_error (obj , "source sequence is illegal/malformed utf-8" );
1126+ break ;
1127+ }
1128+ fbuffer_append_char (buffer , '"' );
1129+ }
1130+
1131+ static void generate_json_string (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
1132+ {
1133+ obj = ensure_valid_encoding (data , obj , false, false);
1134+ raw_generate_json_string (buffer , data , obj );
1135+ }
1136+
1137+ struct hash_foreach_arg {
1138+ VALUE hash ;
1139+ struct generate_json_data * data ;
1140+ int first_key_type ;
1141+ bool first ;
1142+ bool mixed_keys_encountered ;
1143+ };
1144+
10221145NOINLINE ()
10231146static void
10241147json_inspect_hash_with_mixed_keys (struct hash_foreach_arg * arg )
@@ -1035,13 +1158,6 @@ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
10351158 }
10361159}
10371160
1038- static VALUE
1039- json_call_as_json (JSON_Generator_State * state , VALUE object , VALUE is_key )
1040- {
1041- VALUE proc_args [2 ] = {object , is_key };
1042- return rb_proc_call_with_block (state -> as_json , 2 , proc_args , Qnil );
1043- }
1044-
10451161static int
10461162json_object_i (VALUE key , VALUE val , VALUE _arg )
10471163{
@@ -1107,8 +1223,10 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
11071223 break ;
11081224 }
11091225
1226+ key_to_s = ensure_valid_encoding (data , key_to_s , as_json_called , true);
1227+
11101228 if (RB_LIKELY (RBASIC_CLASS (key_to_s ) == rb_cString )) {
1111- generate_json_string (buffer , data , key_to_s );
1229+ raw_generate_json_string (buffer , data , key_to_s );
11121230 } else {
11131231 generate_json (buffer , data , key_to_s );
11141232 }
@@ -1191,85 +1309,6 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
11911309 fbuffer_append_char (buffer , ']' );
11921310}
11931311
1194- static inline int enc_utf8_compatible_p (int enc_idx )
1195- {
1196- if (enc_idx == usascii_encindex ) return 1 ;
1197- if (enc_idx == utf8_encindex ) return 1 ;
1198- return 0 ;
1199- }
1200-
1201- static VALUE encode_json_string_try (VALUE str )
1202- {
1203- return rb_funcall (str , i_encode , 1 , Encoding_UTF_8 );
1204- }
1205-
1206- static VALUE encode_json_string_rescue (VALUE str , VALUE exception )
1207- {
1208- raise_generator_error_str (str , rb_funcall (exception , rb_intern ("message" ), 0 ));
1209- return Qundef ;
1210- }
1211-
1212- static inline VALUE ensure_valid_encoding (VALUE str )
1213- {
1214- int encindex = RB_ENCODING_GET (str );
1215- VALUE utf8_string ;
1216- if (RB_UNLIKELY (!enc_utf8_compatible_p (encindex ))) {
1217- if (encindex == binary_encindex ) {
1218- utf8_string = rb_enc_associate_index (rb_str_dup (str ), utf8_encindex );
1219- switch (rb_enc_str_coderange (utf8_string )) {
1220- case ENC_CODERANGE_7BIT :
1221- return utf8_string ;
1222- case ENC_CODERANGE_VALID :
1223- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1224- // TODO: Raise in 3.0.0
1225- rb_warn ("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0" );
1226- return utf8_string ;
1227- break ;
1228- }
1229- }
1230-
1231- str = rb_rescue (encode_json_string_try , str , encode_json_string_rescue , str );
1232- }
1233- return str ;
1234- }
1235-
1236- static void generate_json_string (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
1237- {
1238- obj = ensure_valid_encoding (obj );
1239-
1240- fbuffer_append_char (buffer , '"' );
1241-
1242- long len ;
1243- search_state search ;
1244- search .buffer = buffer ;
1245- RSTRING_GETMEM (obj , search .ptr , len );
1246- search .cursor = search .ptr ;
1247- search .end = search .ptr + len ;
1248-
1249- #ifdef HAVE_SIMD
1250- search .matches_mask = 0 ;
1251- search .has_matches = false;
1252- search .chunk_base = NULL ;
1253- #endif /* HAVE_SIMD */
1254-
1255- switch (rb_enc_str_coderange (obj )) {
1256- case ENC_CODERANGE_7BIT :
1257- case ENC_CODERANGE_VALID :
1258- if (RB_UNLIKELY (data -> state -> ascii_only )) {
1259- convert_UTF8_to_ASCII_only_JSON (& search , data -> state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
1260- } else if (RB_UNLIKELY (data -> state -> script_safe )) {
1261- convert_UTF8_to_script_safe_JSON (& search );
1262- } else {
1263- convert_UTF8_to_JSON (& search );
1264- }
1265- break ;
1266- default :
1267- raise_generator_error (obj , "source sequence is illegal/malformed utf-8" );
1268- break ;
1269- }
1270- fbuffer_append_char (buffer , '"' );
1271- }
1272-
12731312static void generate_json_fallback (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
12741313{
12751314 VALUE tmp ;
@@ -1406,10 +1445,20 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALU
14061445 if (klass != rb_cArray ) goto general ;
14071446 generate_json_array (buffer , data , obj );
14081447 break ;
1409- case T_STRING :
1448+ case T_STRING : {
14101449 if (klass != rb_cString ) goto general ;
1411- generate_json_string (buffer , data , obj );
1450+
1451+ if (RB_LIKELY (valid_json_string_p (obj ))) {
1452+ raw_generate_json_string (buffer , data , obj );
1453+ } else if (as_json_called ) {
1454+ raise_generator_error (obj , "source sequence is illegal/malformed utf-8" );
1455+ } else {
1456+ obj = ensure_valid_encoding (data , obj , false, false);
1457+ as_json_called = true;
1458+ goto start ;
1459+ }
14121460 break ;
1461+ }
14131462 case T_SYMBOL :
14141463 generate_json_symbol (buffer , data , obj );
14151464 break ;
0 commit comments