@@ -11,7 +11,6 @@ defmodule DiffWeb.LiveView do
1111 DiffWeb.TooLargeComponent . render ( % { file: file_path } )
1212 |> Phoenix.HTML.Safe . to_iodata ( )
1313 |> IO . iodata_to_binary ( )
14- |> sanitize_utf8 ( )
1514
1615 { :ok , % { "diff" => raw_diff , "path_from" => path_from , "path_to" => path_to } } ->
1716 case GitDiff . parse_patch ( raw_diff , relative_from: path_from , relative_to: path_to ) do
@@ -38,42 +37,16 @@ defmodule DiffWeb.LiveView do
3837 end
3938 end
4039
41- defp sanitize_utf8 ( content ) when is_binary ( content ) do
42- case String . valid? ( content ) do
43- true ->
44- content
45-
46- false ->
47- # Multiple fallback strategies for invalid UTF-8
48- sanitize_invalid_bytes ( content )
49- end
50- end
51-
52- defp sanitize_utf8 ( content ) , do: content
53-
54- defp sanitize_invalid_bytes ( content ) do
55- # Try different encoding conversions and fallbacks
56- cond do
57- # Try converting from Latin-1/ISO-8859-1 encoding
58- latin1_result = safe_unicode_convert ( content , :latin1 , :utf8 ) ->
59- latin1_result
60-
61- # Last resort: replace invalid bytes with replacement character
62- true ->
63- content
64- |> :binary . bin_to_list ( )
65- # Replace high bytes with '?'
66- |> Enum . map ( fn byte -> if byte > 127 , do: 63 , else: byte end )
67- |> :binary . list_to_bin ( )
68- end
69- end
70-
71- defp safe_unicode_convert ( content , from , to ) do
72- case :unicode . characters_to_binary ( content , from , to ) do
73- result when is_binary ( result ) -> result
74- _ -> nil
75- end
76- rescue
77- _ -> nil
40+ def sanitize_utf8 ( content ) when is_binary ( content ) do
41+ content
42+ |> String . chunk ( :valid )
43+ |> Enum . map ( fn chunk ->
44+ if String . valid? ( chunk ) do
45+ chunk
46+ else
47+ String . duplicate ( "?" , byte_size ( chunk ) )
48+ end
49+ end )
50+ |> Enum . join ( "" )
7851 end
7952end
0 commit comments