@@ -1023,55 +1023,93 @@ is_unnecessary_quote(_Parts, _Scope) ->
1023
1023
unsafe_to_atom (Part , Line , Column , # elixir_tokenizer {}) when
1024
1024
is_binary (Part ) andalso byte_size (Part ) > 255 ;
1025
1025
is_list (Part ) andalso length (Part ) > 255 ->
1026
- {error , {? LOC (Line , Column ), " atom length must be less than system limit: " , elixir_utils :characters_to_list (Part )}};
1026
+ try
1027
+ PartList = elixir_utils :characters_to_list (Part ),
1028
+ {error , {? LOC (Line , Column ), " atom length must be less than system limit: " , PartList }}
1029
+ catch
1030
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1031
+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1032
+ end ;
1027
1033
unsafe_to_atom (Part , Line , Column , # elixir_tokenizer {static_atoms_encoder = StaticAtomsEncoder }) when
1028
1034
is_function (StaticAtomsEncoder ) ->
1029
- Value = elixir_utils :characters_to_binary (Part ),
1030
- case StaticAtomsEncoder (Value , [{line , Line }, {column , Column }]) of
1031
- {ok , Term } ->
1032
- {ok , Term };
1033
- {error , Reason } when is_binary (Reason ) ->
1034
- {error , {? LOC (Line , Column ), elixir_utils :characters_to_list (Reason ) ++ " : " , elixir_utils :characters_to_list (Part )}}
1035
+ EncodeResult = try
1036
+ ValueEncBin = elixir_utils :characters_to_binary (Part ),
1037
+ ValueEncList = elixir_utils :characters_to_list (Part ),
1038
+ {ok , ValueEncBin , ValueEncList }
1039
+ catch
1040
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1041
+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1042
+ end ,
1043
+
1044
+ case EncodeResult of
1045
+ {ok , Value , ValueList } ->
1046
+ case StaticAtomsEncoder (Value , [{line , Line }, {column , Column }]) of
1047
+ {ok , Term } ->
1048
+ {ok , Term };
1049
+ {error , Reason } when is_binary (Reason ) ->
1050
+ {error , {? LOC (Line , Column ), elixir_utils :characters_to_list (Reason ) ++ " : " , ValueList }}
1051
+ end ;
1052
+ EncError -> EncError
1035
1053
end ;
1036
1054
unsafe_to_atom (Binary , Line , Column , # elixir_tokenizer {existing_atoms_only = true }) when is_binary (Binary ) ->
1037
1055
try
1038
1056
{ok , binary_to_existing_atom (Binary , utf8 )}
1039
1057
catch
1040
1058
error :badarg ->
1041
1059
% Check if it's a UTF-8 issue by trying to convert to list
1042
- elixir_utils :characters_to_list (Binary ),
1043
- % If we get here, it's not a UTF-8 issue
1044
- {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , elixir_utils :characters_to_list (Binary )}}
1060
+ try
1061
+ List = elixir_utils :characters_to_list (Binary ),
1062
+ % If we get here, it's not a UTF-8 issue
1063
+ {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , List }}
1064
+ catch
1065
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1066
+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1067
+ end
1045
1068
end ;
1046
1069
unsafe_to_atom (Binary , Line , Column , # elixir_tokenizer {}) when is_binary (Binary ) ->
1047
1070
try
1048
1071
{ok , binary_to_atom (Binary , utf8 )}
1049
1072
catch
1050
1073
error :badarg ->
1051
1074
% Try to convert using elixir_utils to get proper UnicodeConversionError
1052
- elixir_utils :characters_to_list (Binary ),
1053
- % If we get here, it's not a UTF-8 issue, so it's some other badarg
1054
- {error , {? LOC (Line , Column ), " invalid atom: " , elixir_utils :characters_to_list (Binary )}}
1075
+ try
1076
+ List = elixir_utils :characters_to_list (Binary ),
1077
+ % If we get here, it's not a UTF-8 issue, so it's some other badarg
1078
+ {error , {? LOC (Line , Column ), " invalid atom: " , List }}
1079
+ catch
1080
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1081
+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1082
+ end
1055
1083
end ;
1056
1084
unsafe_to_atom (List , Line , Column , # elixir_tokenizer {existing_atoms_only = true }) when is_list (List ) ->
1057
1085
try
1058
1086
{ok , list_to_existing_atom (List )}
1059
1087
catch
1060
1088
error :badarg ->
1061
1089
% Try to convert using elixir_utils to get proper UnicodeConversionError
1062
- elixir_utils :characters_to_binary (List ),
1063
- % If we get here, it's not a UTF-8 issue
1064
- {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , List }}
1090
+ try
1091
+ elixir_utils :characters_to_binary (List ),
1092
+ % If we get here, it's not a UTF-8 issue
1093
+ {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , List }}
1094
+ catch
1095
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1096
+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1097
+ end
1065
1098
end ;
1066
1099
unsafe_to_atom (List , Line , Column , # elixir_tokenizer {}) when is_list (List ) ->
1067
1100
try
1068
1101
{ok , list_to_atom (List )}
1069
1102
catch
1070
1103
error :badarg ->
1071
1104
% Try to convert using elixir_utils to get proper UnicodeConversionError
1072
- elixir_utils :characters_to_binary (List ),
1073
- % If we get here, it's not a UTF-8 issue, so it's some other badarg
1074
- {error , {? LOC (Line , Column ), " invalid atom: " , List }}
1105
+ try
1106
+ elixir_utils :characters_to_binary (List ),
1107
+ % If we get here, it's not a UTF-8 issue, so it's some other badarg
1108
+ {error , {? LOC (Line , Column ), " invalid atom: " , List }}
1109
+ catch
1110
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1111
+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1112
+ end
1075
1113
end .
1076
1114
1077
1115
collect_modifiers ([H | T ], Buffer ) when ? is_downcase (H ) or ? is_upcase (H ) or ? is_digit (H ) ->
@@ -1095,7 +1133,12 @@ extract_heredoc_with_interpolation(Line, Column, Scope, Interpol, T, H) ->
1095
1133
{Parts1 , {ShouldWarn , _ }} = lists :mapfoldl (Fun , {false , Line }, Parts0 ),
1096
1134
Parts2 = extract_heredoc_head (Parts1 ),
1097
1135
NewScope = maybe_heredoc_warn (ShouldWarn , Column , InterScope , H ),
1098
- {ok , NewLine , NewColumn , tokens_to_binary (Parts2 ), Rest , NewScope };
1136
+ try
1137
+ {ok , NewLine , NewColumn , tokens_to_binary (Parts2 ), Rest , NewScope }
1138
+ catch
1139
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1140
+ {error , interpolation_format (Message , " (for heredoc starting at line ~B )" , [Line ], Line , Column , [H , H , H ], [H , H , H ])}
1141
+ end ;
1099
1142
1100
1143
{error , Reason } ->
1101
1144
{error , interpolation_format (Reason , " (for heredoc starting at line ~B )" , [Line ], Line , Column , [H , H , H ], [H , H , H ])}
@@ -1166,8 +1209,13 @@ unescape_tokens(Tokens, Line, Column, #elixir_tokenizer{unescape=true}) ->
1166
1209
{error , Message , Token } ->
1167
1210
{error , {? LOC (Line , Column ), Message ++ " . Syntax error after: " , Token }}
1168
1211
end ;
1169
- unescape_tokens (Tokens , _Line , _Column , # elixir_tokenizer {unescape = false }) ->
1170
- {ok , tokens_to_binary (Tokens )}.
1212
+ unescape_tokens (Tokens , Line , Column , # elixir_tokenizer {unescape = false }) ->
1213
+ try
1214
+ {ok , tokens_to_binary (Tokens )}
1215
+ catch
1216
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1217
+ {error , {? LOC (Line , Column ), " invalid encoding in tokens: " , elixir_utils :characters_to_list (Message )}}
1218
+ end .
1171
1219
1172
1220
tokens_to_binary (Tokens ) ->
1173
1221
[if is_list (Token ) -> elixir_utils :characters_to_binary (Token ); true -> Token end
@@ -1671,7 +1719,14 @@ tokenize_sigil_contents([H | T] = Original, [S | _] = SigilName, Line, Column, S
1671
1719
case elixir_interpolation :extract (Line , Column + 1 , Scope , ? is_downcase (S ), T , sigil_terminator (H )) of
1672
1720
{NewLine , NewColumn , Parts , Rest , NewScope } ->
1673
1721
Indentation = nil ,
1674
- add_sigil_token (SigilName , Line , Column , NewLine , NewColumn , tokens_to_binary (Parts ), Rest , NewScope , Tokens , Indentation , <<H >>);
1722
+ try
1723
+ add_sigil_token (SigilName , Line , Column , NewLine , NewColumn , tokens_to_binary (Parts ), Rest , NewScope , Tokens , Indentation , <<H >>)
1724
+ catch
1725
+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1726
+ Sigil = [$~ , S , H ],
1727
+ Message = " (for sigil ~ts starting at line ~B )" ,
1728
+ interpolation_error (Message , [$~ ] ++ SigilName ++ Original , Scope , Tokens , Message , [Sigil , Line ], Line , Column , [H ], [sigil_terminator (H )])
1729
+ end ;
1675
1730
1676
1731
{error , Reason } ->
1677
1732
Sigil = [$~ , S , H ],
0 commit comments