Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always use optimal encoding function #512

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 45 additions & 9 deletions lib/floki/entities.ex
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,51 @@ defmodule Floki.Entities do
* greater-than sign - > - is replaced by ">".

All other simbols are going to remain the same.

Optimized IO data implementation from Plug.HTML
"""
@spec encode(String.t()) :: String.t()
def encode(string) when is_binary(string) do
String.replace(string, ["'", "\"", "&", "<", ">"], fn
"'" -> "&#39;"
"\"" -> "&quot;"
"&" -> "&amp;"
"<" -> "&lt;"
">" -> "&gt;"
end)
@spec encode(iodata()) :: iodata()
def encode(string) when is_binary(string), do: encode(string, 0, string, [])
def encode(data), do: encode(IO.iodata_to_binary(data))

escapes = [
{?<, "&lt;"},
{?>, "&gt;"},
{?&, "&amp;"},
{?", "&quot;"},
{?', "&#39;"}
]

for {match, insert} <- escapes do
defp encode(<<unquote(match), rest::bits>>, skip, original, acc) do
encode(rest, skip + 1, original, [acc | unquote(insert)])
end
end

defp encode(<<_char, rest::bits>>, skip, original, acc) do
encode(rest, skip, original, acc, 1)
end

defp encode(<<>>, _skip, _original, acc) do
acc
end

for {match, insert} <- escapes do
defp encode(<<unquote(match), rest::bits>>, skip, original, acc, len) do
part = binary_part(original, skip, len)
encode(rest, skip + len + 1, original, [acc, part | unquote(insert)])
end
end

defp encode(<<_char, rest::bits>>, skip, original, acc, len) do
encode(rest, skip, original, acc, len + 1)
end

defp encode(<<>>, 0, original, _acc, _len) do
original
end

defp encode(<<>>, skip, original, acc, len) do
[acc | binary_part(original, skip, len)]
end
end
53 changes: 1 addition & 52 deletions lib/floki/raw_html.ex
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,7 @@ defmodule Floki.RawHTML do
end

defp build_attrs({attr, value}, encoder) do
if encoder == @encoder do
[attr, "=\"", html_escape(value) | "\""]
else
[attr, "=\"", value | "\""]
end
[attr, "=\"", encoder.(value) | "\""]
end

defp build_attrs(attr, _encoder), do: attr
Expand Down Expand Up @@ -164,53 +160,6 @@ defmodule Floki.RawHTML do
end
end

# html_escape
# Optimized IO data implementation from Plug.HTML

defp html_escape(data) when is_binary(data), do: html_escape(data, 0, data, [])
defp html_escape(data), do: html_escape(IO.iodata_to_binary(data))

escapes = [
{?<, "&lt;"},
{?>, "&gt;"},
{?&, "&amp;"},
{?", "&quot;"},
{?', "&#39;"}
]

for {match, insert} <- escapes do
defp html_escape(<<unquote(match), rest::bits>>, skip, original, acc) do
html_escape(rest, skip + 1, original, [acc | unquote(insert)])
end
end

defp html_escape(<<_char, rest::bits>>, skip, original, acc) do
html_escape(rest, skip, original, acc, 1)
end

defp html_escape(<<>>, _skip, _original, acc) do
acc
end

for {match, insert} <- escapes do
defp html_escape(<<unquote(match), rest::bits>>, skip, original, acc, len) do
part = binary_part(original, skip, len)
html_escape(rest, skip + len + 1, original, [acc, part | unquote(insert)])
end
end

defp html_escape(<<_char, rest::bits>>, skip, original, acc, len) do
html_escape(rest, skip, original, acc, len + 1)
end

defp html_escape(<<>>, 0, original, _acc, _len) do
original
end

defp html_escape(<<>>, skip, original, acc, len) do
[acc | binary_part(original, skip, len)]
end

# helpers

# TODO: Use Enum.map_intersperse/3 when we require Elixir v1.10+
Expand Down
10 changes: 5 additions & 5 deletions test/floki/entities_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@ defmodule Floki.EntitiesTest do

describe "encode/1" do
test "encode single-quote" do
assert Entities.encode("'") == "&#39;"
assert IO.iodata_to_binary(Entities.encode("'")) == "&#39;"
end

test "encode double-quote" do
assert Entities.encode("\"") == "&quot;"
assert IO.iodata_to_binary(Entities.encode("\"")) == "&quot;"
end

test "ampersand" do
assert Entities.encode("&") == "&amp;"
assert IO.iodata_to_binary(Entities.encode("&")) == "&amp;"
end

test "encode less-than sign" do
assert Entities.encode("<") == "&lt;"
assert IO.iodata_to_binary(Entities.encode("<")) == "&lt;"
end

test "encode greater-than sign" do
assert Entities.encode(">") == "&gt;"
assert IO.iodata_to_binary(Entities.encode(">")) == "&gt;"
end

test "does not encode others" do
Expand Down
Loading