Skip to content

Commit 8ffe269

Browse files
authored
refactor lexer and parser (#19)
1 parent 72047eb commit 8ffe269

22 files changed

+3602
-2829
lines changed

.iex.exs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ Application.put_env(:sql, SQL.Repo, username: "postgres", password: "postgres",
99
Mix.Tasks.Ecto.Create.run(["-r", "SQL.Repo"])
1010
SQL.Repo.start_link()
1111
import SQL
12+
alias SQL.BNF

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@
55

66
# Changelog
77

8+
## v0.3.0 (2025-08-01)
9+
10+
### Enhancement
11+
- Improve SQL generation with 57-344x compared to Ecto [#12](https://github.com/elixir-dbvisor/sql/pull/12).
12+
- Fix bug for complex CTE [#15](https://github.com/elixir-dbvisor/sql/pull/15). Thanks to @kafaichoi
13+
- Support for PostgresSQL GiST operators [#18](https://github.com/elixir-dbvisor/sql/pull/18). Thanks to @ibarchenkov
14+
- `float` and `integer` nodes have now become `numeric` with metadata to distinguish `sign`, `whole` and `fractional` [#19](https://github.com/elixir-dbvisor/sql/pull/19).
15+
- `keyword` nodes are now `ident` with metadata distinguish if it's a `keyword` [#19](https://github.com/elixir-dbvisor/sql/pull/19).
16+
- `SQL.Lexer.lex/4` now returns `{:ok, context, tokens}` [#19](https://github.com/elixir-dbvisor/sql/pull/19).
17+
- `SQL.Parser.parse/1` has become `SQL.Parser.parse/2` and takes `tokens` and `context` from `SQL.Lexer.lex/4` and returns `{:ok, context, tokens}` or raises an error [#19](https://github.com/elixir-dbvisor/sql/pull/19).
18+
19+
820
## v0.2.0 (2025-05-04)
921

1022
### Enhancement

bench.exs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,29 @@ Application.put_env(:sql, :ecto_repos, [SQL.Repo])
77
Application.put_env(:sql, SQL.Repo, username: "postgres", password: "postgres", hostname: "localhost", database: "sql_test#{System.get_env("MIX_TEST_PARTITION")}", pool: Ecto.Adapters.SQL.Sandbox, pool_size: 10)
88
SQL.Repo.__adapter__().storage_up(SQL.Repo.config())
99
SQL.Repo.start_link()
10-
range = 1..10_000
10+
sql = ~SQL[with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)]
11+
query = "temp" |> recursive_ctes(true) |> with_cte("temp", as: ^union_all(select("temp", [t], %{n: 0, fact: 1}), ^where(select("temp", [t], [t.n+1, t.n+1*t.fact]), [t], t.n < 9))) |> select([t], [t.n])
12+
result = Tuple.to_list(SQL.Lexer.lex("with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)", __ENV__.file))
13+
tokens = Enum.at(result, -1)
14+
context = Enum.at(result, 1)
1115
Benchee.run(
1216
%{
13-
"to_string" => fn -> for _ <- range, do: to_string(~SQL[with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)]) end,
14-
"to_sql" => fn -> for _ <- range, do: SQL.to_sql(~SQL[with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)]) end,
15-
"inspect" => fn -> for _ <- range, do: inspect(~SQL[with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)]) end,
16-
"ecto" => fn -> for _ <- range, do: SQL.Repo.to_sql(:all, "temp" |> recursive_ctes(true) |> with_cte("temp", as: ^union_all(select("temp", [t], %{n: 0, fact: 1}), ^where(select("temp", [t], [t.n+1, t.n+1*t.fact]), [t], t.n < 9))) |> select([t], [t.n])) end
17+
"comptime to_string" => fn _ -> to_string(sql) end,
18+
"comptime to_sql" => fn _ -> SQL.to_sql(sql) end,
19+
"comptime inspect" => fn _ -> inspect(sql) end,
20+
"comptime ecto" => fn _ -> SQL.Repo.to_sql(:all, query) end,
21+
"lex" => fn _ -> SQL.Lexer.lex("with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)", __ENV__.file) end,
22+
"parse" => fn _ -> SQL.Parser.parse(tokens, context) end,
23+
"runtime to_string" => fn _ -> to_string(~SQL[with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)]) end,
24+
"runtime to_sql" => fn _ -> SQL.to_sql(~SQL[with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)]) end,
25+
"runtime inspect" => fn _ -> inspect(~SQL[with recursive temp (n, fact) as (select 0, 1 union all select n+1, (n+1)*fact from temp where n < 9)]) end,
26+
"runtime ecto" => fn _ -> SQL.Repo.to_sql(:all, "temp" |> recursive_ctes(true) |> with_cte("temp", as: ^union_all(select("temp", [t], %{n: 0, fact: 1}), ^where(select("temp", [t], [t.n+1, t.n+1*t.fact]), [t], t.n < 9))) |> select([t], [t.n])) end
1727
},
18-
time: 10,
19-
memory_time: 2
28+
inputs: %{
29+
"Small" => Enum.to_list(1..1_000),
30+
"Medium" => Enum.to_list(1..10_000),
31+
"Bigger" => Enum.to_list(1..100_000)
32+
},
33+
memory_time: 2,
34+
reduction_time: 2
2035
)

lib/adapters/ansi.ex

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,31 +11,44 @@ defmodule SQL.Adapters.ANSI do
1111

1212
@doc false
1313
def token_to_string(value, mod \\ __MODULE__)
14-
def token_to_string(value, mod) when is_struct(value) do
15-
to_string(%{value | module: mod})
14+
def token_to_string(value, _mod) when is_struct(value) do
15+
to_string(value)
1616
end
17-
def token_to_string({tag, _, [{:parens, _, _} = value]}, mod) when tag in ~w[integer float update]a do
17+
def token_to_string({:*, _, []}, _mod) do
18+
"*"
19+
end
20+
def token_to_string({:fun, _, [left, right]}, mod) do
21+
"#{mod.token_to_string(left)}#{mod.token_to_string(right)}"
22+
end
23+
def token_to_string({tag, _, [{:paren, _, _} = value]}, mod) when tag in ~w[numeric update]a do
1824
"#{mod.token_to_string(tag)}#{mod.token_to_string(value)}"
1925
end
20-
def token_to_string({tag, _, value}, _mod) when tag in ~w[ident integer float]a do
21-
"#{value}"
26+
def token_to_string({:ident, [{:keyword, :non_reserved},{:tag, tag}|_], [{:paren, _, _} = value]}, mod) do
27+
"#{mod.token_to_string(tag)}#{mod.token_to_string(value)}"
2228
end
23-
def token_to_string({tag, _}, mod) do
24-
mod.token_to_string(tag)
29+
def token_to_string({:ident, [{:keyword, :non_reserved}, {:tag, tag}|_], [{:numeric, _, _} = value]}, mod) do
30+
"#{mod.token_to_string(tag)} #{mod.token_to_string(value)}"
31+
end
32+
def token_to_string({_tag, [{:keyword, :non_reserved}|_], value}, mod) do
33+
"#{mod.token_to_string(value)}"
34+
end
35+
def token_to_string({:numeric = tag, _, []}, mod), do: mod.token_to_string(tag)
36+
def token_to_string({tag, _, value}, _mod) when tag in ~w[ident numeric]a do
37+
value
2538
end
2639
def token_to_string({:comment, _, value}, _mod) do
27-
"-- #{value}"
40+
"--#{value}"
2841
end
2942
def token_to_string({:comments, _, value}, _mod) do
30-
"\\* #{value} *\\"
43+
"\\*#{value}*\\"
3144
end
3245
def token_to_string({:double_quote, _, value}, _mod) do
3346
"\"#{value}\""
3447
end
3548
def token_to_string({:quote, _, value}, _mod) do
3649
"'#{value}'"
3750
end
38-
def token_to_string({:parens, _, value}, mod) do
51+
def token_to_string({:paren, _, value}, mod) do
3952
"(#{mod.token_to_string(value)})"
4053
end
4154
def token_to_string({:bracket, _, value}, mod) do
@@ -47,21 +60,36 @@ defmodule SQL.Adapters.ANSI do
4760
def token_to_string({:comma, _, value}, mod) do
4861
", #{mod.token_to_string(value)}"
4962
end
63+
def token_to_string({:dot, _, [left, right]}, mod) do
64+
"#{mod.token_to_string(left)}.#{mod.token_to_string(right)}"
65+
end
5066
def token_to_string({tag, _, []}, mod) do
5167
mod.token_to_string(tag)
5268
end
53-
def token_to_string({tag, _, [[_ | _] = left, right]}, mod) when tag in ~w[join]a do
69+
def token_to_string({:join=tag, _, [right]}, mod) do
70+
"#{mod.token_to_string(tag)} #{mod.token_to_string(right)}"
71+
end
72+
def token_to_string({:join=tag, _, [{t, [{:keyword, :reserved}|_], _}=p, p1, p2, right]}, mod) when t != :as do
73+
"#{mod.token_to_string(p)} #{mod.token_to_string(p1)} #{mod.token_to_string(p2)} #{mod.token_to_string(tag)} #{mod.token_to_string(right)}"
74+
end
75+
def token_to_string({:join=tag, _, [{t, [{:keyword, :reserved}|_], _}=p, p1, right]}, mod) when t != :as do
76+
"#{mod.token_to_string(p)} #{mod.token_to_string(p1)} #{mod.token_to_string(tag)} #{mod.token_to_string(right)}"
77+
end
78+
def token_to_string({:join=tag, _, [{t, [{:keyword, :reserved}|_], _}=left, right]}, mod) when t != :as do
5479
"#{mod.token_to_string(left)} #{mod.token_to_string(tag)} #{mod.token_to_string(right)}"
5580
end
5681
def token_to_string({tag, _, [{:with = t, _, [left, right]}]}, mod) when tag in ~w[to]a do
5782
"#{mod.token_to_string(tag)} #{mod.token_to_string(left)} #{mod.token_to_string(t)} #{mod.token_to_string(right)}"
5883
end
59-
def token_to_string({tag, _, value}, mod) when tag in ~w[select from fetch limit where order offset group having with join by distinct create type drop insert alter table add into delete update start grant revoke set declare open close commit rollback references recursive]a do
84+
def token_to_string({tag, _, value}, mod) when tag in ~w[select from fetch limit where order offset group having with join by distinct create type drop insert alter table add into delete update start grant revoke set declare open close commit rollback references recursive outer]a do
6085
"#{mod.token_to_string(tag)} #{mod.token_to_string(value)}"
6186
end
6287
def token_to_string({:on = tag, _, [source, as, value]}, mod) do
6388
"#{mod.token_to_string(source)} #{mod.token_to_string(as)} #{mod.token_to_string(tag)} #{mod.token_to_string(value)}"
6489
end
90+
def token_to_string({:not = tag, _, [ident | values]}, mod) when values != [] do
91+
"#{mod.token_to_string(ident)} #{mod.token_to_string(tag)} #{mod.token_to_string(values)}"
92+
end
6593
def token_to_string({tag, _, [left, [{:all = t, _, right}]]}, mod) when tag in ~w[union except intersect]a do
6694
"#{mod.token_to_string(left)} #{mod.token_to_string(tag)} #{mod.token_to_string(t)} #{mod.token_to_string(right)}"
6795
end
@@ -71,19 +99,13 @@ defmodule SQL.Adapters.ANSI do
7199
def token_to_string({tag, _, [left, right]}, mod) when tag in ~w[:: [\] <> <= >= != || + - ^ * / % < > = like ilike as union except intersect between and or on is not in cursor for to]a do
72100
"#{mod.token_to_string(left)} #{mod.token_to_string(tag)} #{mod.token_to_string(right)}"
73101
end
74-
def token_to_string({tag, _, [{:parens, _, _} = value]}, mod) when tag not in ~w[in on]a do
102+
def token_to_string({tag, _, [{:paren, _, _} = value]}, mod) when tag not in ~w[in on]a do
75103
"#{mod.token_to_string(tag)}#{mod.token_to_string(value)}"
76104
end
77-
def token_to_string({tag, _, values}, mod) when tag in ~w[not all between symmetric absolute relative forward backward on in for without]a do
105+
def token_to_string({tag, _, values}, mod) when tag in ~w[not all between asymmetric symmetric absolute relative forward backward on in for without]a do
78106
"#{mod.token_to_string(tag)} #{mod.token_to_string(values)}"
79107
end
80-
def token_to_string({tag, _, [left, right]}, mod) when tag in ~w[.]a do
81-
"#{mod.token_to_string(left)}.#{mod.token_to_string(right)}"
82-
end
83-
def token_to_string({tag, _, [left]}, mod) when tag in ~w[not]a do
84-
"#{mod.token_to_string(left)} #{mod.token_to_string(tag)}"
85-
end
86-
def token_to_string({tag, _, [left]}, mod) when tag in ~w[asc desc isnull notnull]a do
108+
def token_to_string({tag, _, [left]}, mod) when tag in ~w[asc desc isnull notnull not]a do
87109
"#{mod.token_to_string(left)} #{mod.token_to_string(tag)}"
88110
end
89111
def token_to_string({:binding, _, [idx]}, _mod) when is_integer(idx) do
@@ -92,7 +114,7 @@ defmodule SQL.Adapters.ANSI do
92114
def token_to_string({:binding, _, value}, _mod) do
93115
"{{#{value}}}"
94116
end
95-
def token_to_string(:asterisk, _mod) do
117+
def token_to_string(:*, _mod) do
96118
"*"
97119
end
98120
def token_to_string(value, _mod) when is_atom(value) do
@@ -101,12 +123,15 @@ defmodule SQL.Adapters.ANSI do
101123
def token_to_string(value, _mod) when is_binary(value) do
102124
"'#{value}'"
103125
end
104-
def token_to_string(values, mod) when is_list(values) do
126+
def token_to_string([h|_]=values, mod) when is_tuple(h) or is_tuple(hd(h)) do
105127
values
106128
|> Enum.reduce([], fn
107-
token, [] = acc -> [acc | mod.token_to_string(token)]
108-
{:comma, _, _} = token, acc -> [acc | mod.token_to_string(token)]
109-
token, acc -> [acc, " " | mod.token_to_string(token)]
129+
token, [] = acc -> [acc,mod.token_to_string(token, mod)]
130+
{:comma, _, _} = token, acc -> [acc,mod.token_to_string(token, mod)]
131+
token, acc -> [acc," ",mod.token_to_string(token, mod)]
110132
end)
111133
end
134+
def token_to_string(value, _mod) do
135+
value
136+
end
112137
end

lib/adapters/postgres.ex

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,8 @@ defmodule SQL.Adapters.Postgres do
1414
def token_to_string({:not, _, [left, {:in, _, [{:binding, _, _} = right]}]}, mod), do: "#{mod.token_to_string(left)} != ANY(#{mod.token_to_string(right)})"
1515
def token_to_string({:in, _, [left, {:binding, _, _} = right]}, mod), do: "#{mod.token_to_string(left)} = ANY(#{mod.token_to_string(right)})"
1616
def token_to_string({:binding, _, [idx]}, _mod) when is_integer(idx), do: "$#{idx}"
17+
def token_to_string({tag, _, [left, right]}, mod) when tag in ~w[>>=]a do
18+
"#{mod.token_to_string(left)} #{mod.token_to_string(tag)} #{mod.token_to_string(right)}"
19+
end
1720
def token_to_string(token, mod), do: SQL.Adapters.ANSI.token_to_string(token, mod)
1821
end

0 commit comments

Comments
 (0)