From 2760d56469cf360584e80fd64ee89d31fa7e6faf Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Wed, 9 Jul 2025 16:23:34 +0200 Subject: [PATCH 1/3] Improve support for reserved keywords as table aliases in Snowflake --- src/dialect/mod.rs | 10 ++++- src/dialect/snowflake.rs | 79 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 26 ++++++++---- tests/sqlparser_snowflake.rs | 51 +++++++++++++++++++++++ 4 files changed, 157 insertions(+), 9 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8f9dd617f..861dfe265 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -992,11 +992,17 @@ pub trait Dialect: Debug + Any { explicit || self.is_column_alias(kw, parser) } + /// Returns true if the specified keyword should be parsed as a table identifier. + /// See [keywords::RESERVED_FOR_TABLE_ALIAS] + fn is_table_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { + !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw) + } + /// Returns true if the specified keyword should be parsed as a table factor alias. /// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided /// to enable looking ahead if needed. - fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool { - explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw) + fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + explicit || self.is_table_alias(kw, parser) } /// Returns true if this dialect supports querying historical table data diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 21bc95930..24db7617b 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -345,6 +345,85 @@ impl Dialect for SnowflakeDialect { } } + fn is_table_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // The following keywords can be considered an alias as long as + // they are not followed by other tokens that may change their meaning + Keyword::LIMIT + | Keyword::RETURNING + | Keyword::INNER + | Keyword::USING + | Keyword::PIVOT + | Keyword::UNPIVOT + | Keyword::EXCEPT + | Keyword::MATCH_RECOGNIZE + | Keyword::OFFSET + if !matches!(parser.peek_token_ref().token, Token::SemiColon | Token::EOF) => + { + false + } + + // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT` + // which would give it a different meanings, for example: + // `SELECT * FROM tbl FETCH FIRST 10 ROWS` - not an alias + // `SELECT * FROM tbl FETCH 10` - not an alias + Keyword::FETCH + if parser.peek_keyword(Keyword::FIRST) + || parser.peek_keyword(Keyword::NEXT) + || matches!(parser.peek_token().token, Token::Number(_, _)) => + { + false + } + + // All sorts of join-related keywords can be considered aliases unless additional + // keywords change their meaning. + Keyword::RIGHT | Keyword::LEFT | Keyword::SEMI | Keyword::ANTI + if parser + .peek_one_of_keywords(&[Keyword::JOIN, Keyword::OUTER]) + .is_some() => + { + false + } + Keyword::GLOBAL if parser.peek_keyword(Keyword::FULL) => false, + + // Reserved keywords by the Snowflake dialect, which seem to be less strictive + // than what is listed in `keywords::RESERVED_FOR_TABLE_ALIAS`. The following + // keywords were tested with the this statement: `SELECT .* FROM tbl `. + Keyword::WITH + | Keyword::ORDER + | Keyword::SELECT + | Keyword::WHERE + | Keyword::GROUP + | Keyword::HAVING + | Keyword::LATERAL + | Keyword::UNION + | Keyword::INTERSECT + | Keyword::MINUS + | Keyword::ON + | Keyword::JOIN + | Keyword::INNER + | Keyword::CROSS + | Keyword::FULL + | Keyword::LEFT + | Keyword::RIGHT + | Keyword::NATURAL + | Keyword::USING + | Keyword::ASOF + | Keyword::MATCH_CONDITION + | Keyword::SET + | Keyword::QUALIFY + | Keyword::FOR + | Keyword::START + | Keyword::CONNECT + | Keyword::SAMPLE + | Keyword::TABLESAMPLE + | Keyword::FROM => false, + + // Any other word is considered an alias + _ => true, + } + } + /// See: fn supports_timestamp_versioning(&self) -> bool { true diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a00405aaf..906be9bef 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5548,7 +5548,8 @@ fn parse_named_window_functions() { WINDOW w AS (PARTITION BY x), win AS (ORDER BY y)"; supported_dialects.verified_stmt(sql); - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))) + .verified_only_select(sql); const EXPECTED_PROJ_QTY: usize = 2; assert_eq!(EXPECTED_PROJ_QTY, select.projection.len()); @@ -5578,6 +5579,7 @@ fn parse_named_window_functions() { #[test] fn parse_window_clause() { + let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))); let sql = "SELECT * \ FROM mytable \ WINDOW \ @@ -5590,10 +5592,14 @@ fn parse_window_clause() { window7 AS (window1 ROWS UNBOUNDED PRECEDING), \ window8 AS (window1 PARTITION BY a ORDER BY b ROWS UNBOUNDED PRECEDING) \ ORDER BY C3"; - verified_only_select(sql); + dialects.verified_only_select(sql); let sql = "SELECT * from mytable WINDOW window1 AS window2"; - let dialects = all_dialects_except(|d| d.is::() || d.is::()); + let dialects = all_dialects_except(|d| { + d.is::() + || d.is::() + || d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d)) + }); let res = dialects.parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: (, found: window2".to_string()), @@ -5603,6 +5609,7 @@ fn parse_window_clause() { #[test] fn test_parse_named_window() { + let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))); let sql = "SELECT \ MIN(c12) OVER window1 AS min1, \ MAX(c12) OVER window2 AS max1 \ @@ -5610,7 +5617,7 @@ fn test_parse_named_window() { WINDOW window1 AS (ORDER BY C12), \ window2 AS (PARTITION BY C11) \ ORDER BY C3"; - let actual_select_only = verified_only_select(sql); + let actual_select_only = dialects.verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), distinct: None, @@ -5759,6 +5766,10 @@ fn test_parse_named_window() { #[test] fn parse_window_and_qualify_clause() { + let dialects = all_dialects_except(|d| { + d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d)) + || d.is_table_alias(&Keyword::QUALIFY, &mut Parser::new(d)) + }); let sql = "SELECT \ MIN(c12) OVER window1 AS min1 \ FROM aggregate_test_100 \ @@ -5766,7 +5777,7 @@ fn parse_window_and_qualify_clause() { WINDOW window1 AS (ORDER BY C12), \ window2 AS (PARTITION BY C11) \ ORDER BY C3"; - verified_only_select(sql); + dialects.verified_only_select(sql); let sql = "SELECT \ MIN(c12) OVER window1 AS min1 \ @@ -5775,7 +5786,7 @@ fn parse_window_and_qualify_clause() { window2 AS (PARTITION BY C11) \ QUALIFY ROW_NUMBER() OVER my_window \ ORDER BY C3"; - verified_only_select(sql); + dialects.verified_only_select(sql); } #[test] @@ -7443,7 +7454,8 @@ fn parse_join_syntax_variants() { "SELECT c1 FROM t1 FULL JOIN t2 USING(c1)", ); - let res = parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); + let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::OUTER, &mut Parser::new(d))); + let res = dialects.parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); assert_eq!( ParserError::ParserError("Expected: APPLY, found: JOIN".to_string()), res.unwrap_err() diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 389f47034..65546bee0 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3492,6 +3492,57 @@ fn test_sql_keywords_as_select_item_aliases() { } } +#[test] +fn test_sql_keywords_as_table_aliases() { + // Some keywords that should be parsed as an alias implicitly + let unreserved_kws = vec![ + "VIEW", + "EXPLAIN", + "ANALYZE", + "SORT", + "PIVOT", + "UNPIVOT", + "TOP", + "LIMIT", + "OFFSET", + "FETCH", + "EXCEPT", + "CLUSTER", + "DISTRIBUTE", + "GLOBAL", + "ANTI", + "SEMI", + "RETURNING", + "OUTER", + "WINDOW", + "END", + "PARTITION", + "PREWHERE", + "SETTINGS", + "FORMAT", + "MATCH_RECOGNIZE", + "OPEN", + ]; + + for kw in unreserved_kws { + snowflake().verified_stmt(&format!("SELECT * FROM tbl AS {kw}")); + snowflake().one_statement_parses_to( + &format!("SELECT * FROM tbl {kw}"), + &format!("SELECT * FROM tbl AS {kw}"), + ); + } + + // Some keywords that should not be parsed as an alias implicitly + let reserved_kws = vec![ + "FROM", "GROUP", "HAVING", "ORDER", "SELECT", "UNION", "WHERE", "WITH", + ]; + for kw in reserved_kws { + assert!(snowflake() + .parse_sql_statements(&format!("SELECT * FROM tbl {kw}")) + .is_err()); + } +} + #[test] fn test_timetravel_at_before() { snowflake().verified_only_select("SELECT * FROM tbl AT(TIMESTAMP => '2024-12-15 00:00:00')"); From cdfd75f3235f13a60aa3528110295eeee84c8713 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Thu, 10 Jul 2025 18:08:42 +0200 Subject: [PATCH 2/3] Code review comments --- src/dialect/snowflake.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 24db7617b..036f1f0d0 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -319,8 +319,7 @@ impl Dialect for SnowflakeDialect { // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT` // which would give it a different meanings, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias - Keyword::FETCH - if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) => + Keyword::FETCH if parser.peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]).is_some() => { false } From f81d917ea0047645fefccde5804f609e98d28687 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Thu, 10 Jul 2025 18:14:58 +0200 Subject: [PATCH 3/3] Code review comments --- src/dialect/snowflake.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 036f1f0d0..85c09acb3 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -318,8 +318,11 @@ impl Dialect for SnowflakeDialect { } // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT` - // which would give it a different meanings, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias - Keyword::FETCH if parser.peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]).is_some() => + // which would give it a different meanings, for example: + // `SELECT 1 FETCH FIRST 10 ROWS` - not an alias + // `SELECT 1 FETCH 10` - not an alias + Keyword::FETCH if parser.peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]).is_some() + || matches!(parser.peek_token().token, Token::Number(_, _)) => { false } @@ -367,8 +370,9 @@ impl Dialect for SnowflakeDialect { // `SELECT * FROM tbl FETCH FIRST 10 ROWS` - not an alias // `SELECT * FROM tbl FETCH 10` - not an alias Keyword::FETCH - if parser.peek_keyword(Keyword::FIRST) - || parser.peek_keyword(Keyword::NEXT) + if parser + .peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]) + .is_some() || matches!(parser.peek_token().token, Token::Number(_, _)) => { false