Skip to content

Commit 7e8802e

Browse files
committed
update: move ported code
1 parent a382874 commit 7e8802e

File tree

5 files changed

+149
-77
lines changed

5 files changed

+149
-77
lines changed

crates/parser-generator/src/parser_generator/lexer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#[macro_use]
22
mod generated;
3-
mod lexer_ported;
3+
pub mod lexer_ported;
44
pub mod parser_error;
55
mod util;
66

crates/parser-generator/src/parser_generator/lexer/lexer_ported.rs

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/// Ported sources from PostgreSQL
2-
use super::{parser_error::ParserError, Lexer, TokenKind, Yylval};
2+
use super::{parser_error::ParserError, Lexer, Token, TokenKind, Yylval};
33

44
pub fn is_highbit_set(c: char) -> u8 {
55
(c as u8) & 0x80
@@ -145,3 +145,75 @@ impl Lexer {
145145
self.yytext()[..yyleng].to_ascii_lowercase()
146146
}
147147
}
148+
149+
/// The logic for converting tokens in PostgreSQL's parser.c
150+
/// ref: https://github.com/postgres/postgres/blob/REL_16_STABLE/src/backend/parser/parser.c#L195
151+
pub fn init_tokens(tokens: &mut [Token]) {
152+
fn next_token_index(tokens: &[Token], i: usize) -> Option<usize> {
153+
for (j, token) in tokens.iter().enumerate().skip(i + 1) {
154+
match token.kind {
155+
TokenKind::C_COMMENT | TokenKind::SQL_COMMENT => continue,
156+
_ => return Some(j),
157+
}
158+
}
159+
None
160+
}
161+
162+
for i in 0..tokens.len() - 1 {
163+
match &tokens[i].kind {
164+
TokenKind::KEYWORD(k) if k == "FORMAT" => {
165+
if let Some(j) = next_token_index(tokens, i) {
166+
if tokens[j].kind == TokenKind::KEYWORD("JSON".to_string()) {
167+
tokens[i].kind = TokenKind::KEYWORD("FORMAT_LA".to_string());
168+
}
169+
}
170+
}
171+
TokenKind::KEYWORD(k) if k == "NOT" => {
172+
if let Some(j) = next_token_index(tokens, i) {
173+
match &tokens[j].kind {
174+
TokenKind::KEYWORD(k)
175+
if matches!(
176+
k.as_str(),
177+
"BETWEEN" | "IN_P" | "LIKE" | "ILIKE" | "SIMILAR"
178+
) =>
179+
{
180+
tokens[i].kind = TokenKind::KEYWORD("NOT_LA".to_string());
181+
}
182+
_ => {}
183+
}
184+
}
185+
}
186+
TokenKind::KEYWORD(k) if k == "NULLS_P" => {
187+
if let Some(j) = next_token_index(tokens, i) {
188+
match &tokens[j].kind {
189+
TokenKind::KEYWORD(k) if matches!(k.as_str(), "FIRST_P" | "LAST_P") => {
190+
tokens[i].kind = TokenKind::KEYWORD("NULLS_LA".to_string());
191+
}
192+
_ => {}
193+
}
194+
}
195+
}
196+
TokenKind::KEYWORD(k) if k == "WITH" => {
197+
if let Some(j) = next_token_index(tokens, i) {
198+
match &tokens[j].kind {
199+
TokenKind::KEYWORD(k) if matches!(k.as_str(), "TIME" | "ORDINALITY") => {
200+
tokens[i].kind = TokenKind::KEYWORD("WITH_LA".to_string());
201+
}
202+
_ => {}
203+
}
204+
}
205+
}
206+
TokenKind::KEYWORD(k) if k == "WITHOUT" => {
207+
if let Some(j) = next_token_index(tokens, i) {
208+
match &tokens[j].kind {
209+
TokenKind::KEYWORD(k) if matches!(k.as_str(), "TIME") => {
210+
tokens[i].kind = TokenKind::KEYWORD("WITHOUT_LA".to_string());
211+
}
212+
_ => {}
213+
}
214+
}
215+
}
216+
_ => (),
217+
}
218+
}
219+
}

crates/postgresql-cst-parser/src/cst.rs

Lines changed: 1 addition & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use cstree::{
44
use miniz_oxide::inflate::decompress_to_vec;
55

66
use crate::{
7-
lexer::{lex, parser_error::ParserError, TokenKind},
7+
lexer::{lex, lexer_ported::init_tokens, parser_error::ParserError, TokenKind},
88
parser::{
99
end_rule_id, end_rule_kind, num_non_terminal_symbol, num_terminal_symbol,
1010
rule_name_to_component_id, token_kind_to_component_id, Action, ACTION_TABLE, GOTO_TABLE,
@@ -101,78 +101,6 @@ impl Parser {
101101
}
102102
}
103103

104-
/// The logic for converting tokens in PostgreSQL's parser.c
105-
/// ref: https://github.com/postgres/postgres/blob/REL_16_STABLE/src/backend/parser/parser.c#L195
106-
fn init_tokens(tokens: &mut [Token]) {
107-
fn next_token_index(tokens: &[Token], i: usize) -> Option<usize> {
108-
for (j, token) in tokens.iter().enumerate().skip(i + 1) {
109-
match token.kind {
110-
TokenKind::C_COMMENT | TokenKind::SQL_COMMENT => continue,
111-
_ => return Some(j),
112-
}
113-
}
114-
None
115-
}
116-
117-
for i in 0..tokens.len() - 1 {
118-
match &tokens[i].kind {
119-
TokenKind::KEYWORD(k) if k == "FORMAT" => {
120-
if let Some(j) = next_token_index(tokens, i) {
121-
if tokens[j].kind == TokenKind::KEYWORD("JSON".to_string()) {
122-
tokens[i].kind = TokenKind::KEYWORD("FORMAT_LA".to_string());
123-
}
124-
}
125-
}
126-
TokenKind::KEYWORD(k) if k == "NOT" => {
127-
if let Some(j) = next_token_index(tokens, i) {
128-
match &tokens[j].kind {
129-
TokenKind::KEYWORD(k)
130-
if matches!(
131-
k.as_str(),
132-
"BETWEEN" | "IN_P" | "LIKE" | "ILIKE" | "SIMILAR"
133-
) =>
134-
{
135-
tokens[i].kind = TokenKind::KEYWORD("NOT_LA".to_string());
136-
}
137-
_ => {}
138-
}
139-
}
140-
}
141-
TokenKind::KEYWORD(k) if k == "NULLS_P" => {
142-
if let Some(j) = next_token_index(tokens, i) {
143-
match &tokens[j].kind {
144-
TokenKind::KEYWORD(k) if matches!(k.as_str(), "FIRST_P" | "LAST_P") => {
145-
tokens[i].kind = TokenKind::KEYWORD("NULLS_LA".to_string());
146-
}
147-
_ => {}
148-
}
149-
}
150-
}
151-
TokenKind::KEYWORD(k) if k == "WITH" => {
152-
if let Some(j) = next_token_index(tokens, i) {
153-
match &tokens[j].kind {
154-
TokenKind::KEYWORD(k) if matches!(k.as_str(), "TIME" | "ORDINALITY") => {
155-
tokens[i].kind = TokenKind::KEYWORD("WITH_LA".to_string());
156-
}
157-
_ => {}
158-
}
159-
}
160-
}
161-
TokenKind::KEYWORD(k) if k == "WITHOUT" => {
162-
if let Some(j) = next_token_index(tokens, i) {
163-
match &tokens[j].kind {
164-
TokenKind::KEYWORD(k) if matches!(k.as_str(), "TIME") => {
165-
tokens[i].kind = TokenKind::KEYWORD("WITHOUT_LA".to_string());
166-
}
167-
_ => {}
168-
}
169-
}
170-
}
171-
_ => (),
172-
}
173-
}
174-
}
175-
176104
/// Parsing a string as PostgreSQL syntax and converting it into a ResolvedNode
177105
pub fn parse(input: &str) -> Result<ResolvedNode, ParserError> {
178106
let mut tokens = lex(input)?;

crates/postgresql-cst-parser/src/lexer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#[macro_use]
22
mod generated;
3-
mod lexer_ported;
3+
pub mod lexer_ported;
44
pub mod parser_error;
55
mod util;
66

crates/postgresql-cst-parser/src/lexer/lexer_ported.rs

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/// Ported sources from PostgreSQL
2-
use super::{parser_error::ParserError, Lexer, TokenKind, Yylval};
2+
use super::{parser_error::ParserError, Lexer, Token, TokenKind, Yylval};
33

44
pub fn is_highbit_set(c: char) -> u8 {
55
(c as u8) & 0x80
@@ -145,3 +145,75 @@ impl Lexer {
145145
self.yytext()[..yyleng].to_ascii_lowercase()
146146
}
147147
}
148+
149+
/// The logic for converting tokens in PostgreSQL's parser.c
150+
/// ref: https://github.com/postgres/postgres/blob/REL_16_STABLE/src/backend/parser/parser.c#L195
151+
pub fn init_tokens(tokens: &mut [Token]) {
152+
fn next_token_index(tokens: &[Token], i: usize) -> Option<usize> {
153+
for (j, token) in tokens.iter().enumerate().skip(i + 1) {
154+
match token.kind {
155+
TokenKind::C_COMMENT | TokenKind::SQL_COMMENT => continue,
156+
_ => return Some(j),
157+
}
158+
}
159+
None
160+
}
161+
162+
for i in 0..tokens.len() - 1 {
163+
match &tokens[i].kind {
164+
TokenKind::KEYWORD(k) if k == "FORMAT" => {
165+
if let Some(j) = next_token_index(tokens, i) {
166+
if tokens[j].kind == TokenKind::KEYWORD("JSON".to_string()) {
167+
tokens[i].kind = TokenKind::KEYWORD("FORMAT_LA".to_string());
168+
}
169+
}
170+
}
171+
TokenKind::KEYWORD(k) if k == "NOT" => {
172+
if let Some(j) = next_token_index(tokens, i) {
173+
match &tokens[j].kind {
174+
TokenKind::KEYWORD(k)
175+
if matches!(
176+
k.as_str(),
177+
"BETWEEN" | "IN_P" | "LIKE" | "ILIKE" | "SIMILAR"
178+
) =>
179+
{
180+
tokens[i].kind = TokenKind::KEYWORD("NOT_LA".to_string());
181+
}
182+
_ => {}
183+
}
184+
}
185+
}
186+
TokenKind::KEYWORD(k) if k == "NULLS_P" => {
187+
if let Some(j) = next_token_index(tokens, i) {
188+
match &tokens[j].kind {
189+
TokenKind::KEYWORD(k) if matches!(k.as_str(), "FIRST_P" | "LAST_P") => {
190+
tokens[i].kind = TokenKind::KEYWORD("NULLS_LA".to_string());
191+
}
192+
_ => {}
193+
}
194+
}
195+
}
196+
TokenKind::KEYWORD(k) if k == "WITH" => {
197+
if let Some(j) = next_token_index(tokens, i) {
198+
match &tokens[j].kind {
199+
TokenKind::KEYWORD(k) if matches!(k.as_str(), "TIME" | "ORDINALITY") => {
200+
tokens[i].kind = TokenKind::KEYWORD("WITH_LA".to_string());
201+
}
202+
_ => {}
203+
}
204+
}
205+
}
206+
TokenKind::KEYWORD(k) if k == "WITHOUT" => {
207+
if let Some(j) = next_token_index(tokens, i) {
208+
match &tokens[j].kind {
209+
TokenKind::KEYWORD(k) if matches!(k.as_str(), "TIME") => {
210+
tokens[i].kind = TokenKind::KEYWORD("WITHOUT_LA".to_string());
211+
}
212+
_ => {}
213+
}
214+
}
215+
}
216+
_ => (),
217+
}
218+
}
219+
}

0 commit comments

Comments
 (0)