@@ -996,13 +996,12 @@ static inline VALUE vstate_get(struct generate_json_data *data)
996996 return data -> vstate ;
997997}
998998
999- struct hash_foreach_arg {
1000- VALUE hash ;
1001- struct generate_json_data * data ;
1002- int first_key_type ;
1003- bool first ;
1004- bool mixed_keys_encountered ;
1005- };
999+ static VALUE
1000+ json_call_as_json (JSON_Generator_State * state , VALUE object , VALUE is_key )
1001+ {
1002+ VALUE proc_args [2 ] = {object , is_key };
1003+ return rb_proc_call_with_block (state -> as_json , 2 , proc_args , Qnil );
1004+ }
10061005
10071006static VALUE
10081007convert_string_subclass (VALUE key )
@@ -1019,6 +1018,129 @@ convert_string_subclass(VALUE key)
10191018 return key_to_s ;
10201019}
10211020
1021+ static bool enc_utf8_compatible_p (int enc_idx )
1022+ {
1023+ if (enc_idx == usascii_encindex ) return true;
1024+ if (enc_idx == utf8_encindex ) return true;
1025+ return false;
1026+ }
1027+
1028+ static VALUE encode_json_string_try (VALUE str )
1029+ {
1030+ return rb_funcall (str , i_encode , 1 , Encoding_UTF_8 );
1031+ }
1032+
1033+ static VALUE encode_json_string_rescue (VALUE str , VALUE exception )
1034+ {
1035+ raise_generator_error_str (str , rb_funcall (exception , rb_intern ("message" ), 0 ));
1036+ return Qundef ;
1037+ }
1038+
1039+ static inline bool valid_json_string_p (VALUE str )
1040+ {
1041+ int coderange = rb_enc_str_coderange (str );
1042+
1043+ if (RB_LIKELY (coderange == ENC_CODERANGE_7BIT )) {
1044+ return true;
1045+ }
1046+
1047+ if (RB_LIKELY (coderange == ENC_CODERANGE_VALID )) {
1048+ return enc_utf8_compatible_p (RB_ENCODING_GET_INLINED (str ));
1049+ }
1050+
1051+ return false;
1052+ }
1053+
1054+ static inline VALUE ensure_valid_encoding (struct generate_json_data * data , VALUE str , bool as_json_called , bool is_key )
1055+ {
1056+ if (RB_LIKELY (valid_json_string_p (str ))) {
1057+ return str ;
1058+ }
1059+
1060+ if (!as_json_called && data -> state -> strict && RTEST (data -> state -> as_json )) {
1061+ VALUE coerced_str = json_call_as_json (data -> state , str , Qfalse );
1062+ if (coerced_str != str ) {
1063+ if (RB_TYPE_P (coerced_str , T_STRING )) {
1064+ if (!valid_json_string_p (coerced_str )) {
1065+ raise_generator_error (str , "source sequence is illegal/malformed utf-8" );
1066+ }
1067+ } else {
1068+ // as_json could return another type than T_STRING
1069+ if (is_key ) {
1070+ raise_generator_error (coerced_str , "%" PRIsVALUE " not allowed as object key in JSON" , CLASS_OF (coerced_str ));
1071+ }
1072+ }
1073+
1074+ return coerced_str ;
1075+ }
1076+ }
1077+
1078+ if (RB_ENCODING_GET_INLINED (str ) == binary_encindex ) {
1079+ VALUE utf8_string = rb_enc_associate_index (rb_str_dup (str ), utf8_encindex );
1080+ switch (rb_enc_str_coderange (utf8_string )) {
1081+ case ENC_CODERANGE_7BIT :
1082+ return utf8_string ;
1083+ case ENC_CODERANGE_VALID :
1084+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1085+ // TODO: Raise in 3.0.0
1086+ rb_warn ("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0" );
1087+ return utf8_string ;
1088+ break ;
1089+ }
1090+ }
1091+
1092+ return rb_rescue (encode_json_string_try , str , encode_json_string_rescue , str );
1093+ }
1094+
1095+ static void raw_generate_json_string (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
1096+ {
1097+ fbuffer_append_char (buffer , '"' );
1098+
1099+ long len ;
1100+ search_state search ;
1101+ search .buffer = buffer ;
1102+ RSTRING_GETMEM (obj , search .ptr , len );
1103+ search .cursor = search .ptr ;
1104+ search .end = search .ptr + len ;
1105+
1106+ #ifdef HAVE_SIMD
1107+ search .matches_mask = 0 ;
1108+ search .has_matches = false;
1109+ search .chunk_base = NULL ;
1110+ #endif /* HAVE_SIMD */
1111+
1112+ switch (rb_enc_str_coderange (obj )) {
1113+ case ENC_CODERANGE_7BIT :
1114+ case ENC_CODERANGE_VALID :
1115+ if (RB_UNLIKELY (data -> state -> ascii_only )) {
1116+ convert_UTF8_to_ASCII_only_JSON (& search , data -> state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
1117+ } else if (RB_UNLIKELY (data -> state -> script_safe )) {
1118+ convert_UTF8_to_script_safe_JSON (& search );
1119+ } else {
1120+ convert_UTF8_to_JSON (& search );
1121+ }
1122+ break ;
1123+ default :
1124+ raise_generator_error (obj , "source sequence is illegal/malformed utf-8" );
1125+ break ;
1126+ }
1127+ fbuffer_append_char (buffer , '"' );
1128+ }
1129+
1130+ static void generate_json_string (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
1131+ {
1132+ obj = ensure_valid_encoding (data , obj , false, false);
1133+ raw_generate_json_string (buffer , data , obj );
1134+ }
1135+
1136+ struct hash_foreach_arg {
1137+ VALUE hash ;
1138+ struct generate_json_data * data ;
1139+ int first_key_type ;
1140+ bool first ;
1141+ bool mixed_keys_encountered ;
1142+ };
1143+
10221144NOINLINE ()
10231145static void
10241146json_inspect_hash_with_mixed_keys (struct hash_foreach_arg * arg )
@@ -1035,13 +1157,6 @@ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
10351157 }
10361158}
10371159
1038- static VALUE
1039- json_call_as_json (JSON_Generator_State * state , VALUE object , VALUE is_key )
1040- {
1041- VALUE proc_args [2 ] = {object , is_key };
1042- return rb_proc_call_with_block (state -> as_json , 2 , proc_args , Qnil );
1043- }
1044-
10451160static int
10461161json_object_i (VALUE key , VALUE val , VALUE _arg )
10471162{
@@ -1107,8 +1222,10 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
11071222 break ;
11081223 }
11091224
1225+ key_to_s = ensure_valid_encoding (data , key_to_s , as_json_called , true);
1226+
11101227 if (RB_LIKELY (RBASIC_CLASS (key_to_s ) == rb_cString )) {
1111- generate_json_string (buffer , data , key_to_s );
1228+ raw_generate_json_string (buffer , data , key_to_s );
11121229 } else {
11131230 generate_json (buffer , data , key_to_s );
11141231 }
@@ -1191,85 +1308,6 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
11911308 fbuffer_append_char (buffer , ']' );
11921309}
11931310
1194- static inline int enc_utf8_compatible_p (int enc_idx )
1195- {
1196- if (enc_idx == usascii_encindex ) return 1 ;
1197- if (enc_idx == utf8_encindex ) return 1 ;
1198- return 0 ;
1199- }
1200-
1201- static VALUE encode_json_string_try (VALUE str )
1202- {
1203- return rb_funcall (str , i_encode , 1 , Encoding_UTF_8 );
1204- }
1205-
1206- static VALUE encode_json_string_rescue (VALUE str , VALUE exception )
1207- {
1208- raise_generator_error_str (str , rb_funcall (exception , rb_intern ("message" ), 0 ));
1209- return Qundef ;
1210- }
1211-
1212- static inline VALUE ensure_valid_encoding (VALUE str )
1213- {
1214- int encindex = RB_ENCODING_GET (str );
1215- VALUE utf8_string ;
1216- if (RB_UNLIKELY (!enc_utf8_compatible_p (encindex ))) {
1217- if (encindex == binary_encindex ) {
1218- utf8_string = rb_enc_associate_index (rb_str_dup (str ), utf8_encindex );
1219- switch (rb_enc_str_coderange (utf8_string )) {
1220- case ENC_CODERANGE_7BIT :
1221- return utf8_string ;
1222- case ENC_CODERANGE_VALID :
1223- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1224- // TODO: Raise in 3.0.0
1225- rb_warn ("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0" );
1226- return utf8_string ;
1227- break ;
1228- }
1229- }
1230-
1231- str = rb_rescue (encode_json_string_try , str , encode_json_string_rescue , str );
1232- }
1233- return str ;
1234- }
1235-
1236- static void generate_json_string (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
1237- {
1238- obj = ensure_valid_encoding (obj );
1239-
1240- fbuffer_append_char (buffer , '"' );
1241-
1242- long len ;
1243- search_state search ;
1244- search .buffer = buffer ;
1245- RSTRING_GETMEM (obj , search .ptr , len );
1246- search .cursor = search .ptr ;
1247- search .end = search .ptr + len ;
1248-
1249- #ifdef HAVE_SIMD
1250- search .matches_mask = 0 ;
1251- search .has_matches = false;
1252- search .chunk_base = NULL ;
1253- #endif /* HAVE_SIMD */
1254-
1255- switch (rb_enc_str_coderange (obj )) {
1256- case ENC_CODERANGE_7BIT :
1257- case ENC_CODERANGE_VALID :
1258- if (RB_UNLIKELY (data -> state -> ascii_only )) {
1259- convert_UTF8_to_ASCII_only_JSON (& search , data -> state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
1260- } else if (RB_UNLIKELY (data -> state -> script_safe )) {
1261- convert_UTF8_to_script_safe_JSON (& search );
1262- } else {
1263- convert_UTF8_to_JSON (& search );
1264- }
1265- break ;
1266- default :
1267- raise_generator_error (obj , "source sequence is illegal/malformed utf-8" );
1268- break ;
1269- }
1270- fbuffer_append_char (buffer , '"' );
1271- }
1272-
12731311static void generate_json_fallback (FBuffer * buffer , struct generate_json_data * data , VALUE obj )
12741312{
12751313 VALUE tmp ;
@@ -1408,7 +1446,16 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALU
14081446 break ;
14091447 case T_STRING :
14101448 if (klass != rb_cString ) goto general ;
1411- generate_json_string (buffer , data , obj );
1449+
1450+ if (RB_LIKELY (valid_json_string_p (obj ))) {
1451+ raw_generate_json_string (buffer , data , obj );
1452+ } else if (as_json_called ) {
1453+ raise_generator_error (obj , "source sequence is illegal/malformed utf-8" );
1454+ } else {
1455+ obj = ensure_valid_encoding (data , obj , false, false);
1456+ as_json_called = true;
1457+ goto start ;
1458+ }
14121459 break ;
14131460 case T_SYMBOL :
14141461 generate_json_symbol (buffer , data , obj );
0 commit comments