diff --git a/src/ast/dml.rs b/src/ast/dml.rs index d740b140e..e34231dd2 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -90,10 +90,80 @@ pub struct Insert { /// /// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data) pub format_clause: Option, + /// For multi-table insert: `INSERT FIRST` vs `INSERT ALL` + /// + /// When `true`, this is an `INSERT FIRST` statement (only the first matching WHEN clause is executed). + /// When `false` with non-empty `clauses`, this is an `INSERT ALL` statement. + /// + /// See: + pub insert_first: bool, + /// For multi-table insert: additional INTO clauses (unconditional) + /// + /// Used for `INSERT ALL INTO t1 INTO t2 ... SELECT ...` + /// + /// See: + pub multi_table_into_clauses: Vec, + /// For conditional multi-table insert: WHEN clauses + /// + /// Used for `INSERT ALL/FIRST WHEN cond THEN INTO t1 ... SELECT ...` + /// + /// See: + pub multi_table_when_clauses: Vec, + /// For conditional multi-table insert: ELSE clause + /// + /// See: + pub multi_table_else_clause: Option>, } impl Display for Insert { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Check if this is a Snowflake multi-table insert + let is_multi_table = !self.multi_table_into_clauses.is_empty() + || !self.multi_table_when_clauses.is_empty(); + + if is_multi_table { + // Snowflake multi-table INSERT format + write!(f, "INSERT")?; + if self.overwrite { + write!(f, " OVERWRITE")?; + } + if self.insert_first { + write!(f, " FIRST")?; + } else { + write!(f, " ALL")?; + } + + // Unconditional multi-table insert: INTO clauses directly after ALL + for into_clause in &self.multi_table_into_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + + // Conditional multi-table insert: WHEN clauses + for when_clause in &self.multi_table_when_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", when_clause)?; + } + + // ELSE clause + if let Some(else_clauses) = &self.multi_table_else_clause { + SpaceOrNewline.fmt(f)?; + write!(f, "ELSE")?; + for into_clause in else_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + } + + // Source query + if let Some(source) = &self.source { + SpaceOrNewline.fmt(f)?; + source.fmt(f)?; + } + return Ok(()); + } + + // Standard INSERT format let table_name = if let Some(alias) = &self.table_alias { format!("{0} AS {alias}", self.table) } else { @@ -644,3 +714,92 @@ impl fmt::Display for OutputClause { } } } + +/// A WHEN clause in a conditional multi-table INSERT. +/// +/// Syntax: +/// ```sql +/// WHEN n1 > 100 THEN +/// INTO t1 +/// INTO t2 (c1, c2) VALUES (n1, n2) +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertWhenClause { + /// The condition for this WHEN clause + pub condition: Expr, + /// The INTO clauses to execute when the condition is true + pub into_clauses: Vec, +} + +impl Display for MultiTableInsertWhenClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "WHEN {} THEN", self.condition)?; + for into_clause in &self.into_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + Ok(()) + } +} + +/// An INTO clause in a multi-table INSERT. +/// +/// Syntax: +/// ```sql +/// INTO [ ( [ , ... ] ) ] [ VALUES ( { | DEFAULT | NULL } [ , ... ] ) ] +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertIntoClause { + /// The target table + pub table_name: ObjectName, + /// The target columns (optional) + pub columns: Vec, + /// The VALUES clause (optional) + pub values: Option, +} + +impl Display for MultiTableInsertIntoClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "INTO {}", self.table_name)?; + if !self.columns.is_empty() { + write!(f, " ({})", display_comma_separated(&self.columns))?; + } + if let Some(values) = &self.values { + write!(f, " VALUES ({})", display_comma_separated(&values.values))?; + } + Ok(()) + } +} + +/// The VALUES clause in a multi-table INSERT INTO clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertValues { + /// The values to insert (can be column references, DEFAULT, or NULL) + pub values: Vec, +} + +/// A value in a multi-table INSERT VALUES clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MultiTableInsertValue { + /// A column reference or expression from the source + Expr(Expr), + /// The DEFAULT keyword + Default, +} + +impl Display for MultiTableInsertValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MultiTableInsertValue::Expr(expr) => write!(f, "{}", expr), + MultiTableInsertValue::Default => write!(f, "DEFAULT"), + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c8d9c6be3..3d2ec4823 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -81,7 +81,8 @@ pub use self::ddl::{ }; pub use self::dml::{ Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, - MergeInsertKind, MergeUpdateExpr, OutputClause, Update, + MergeInsertKind, MergeUpdateExpr, MultiTableInsertIntoClause, MultiTableInsertValue, + MultiTableInsertValues, MultiTableInsertWhenClause, OutputClause, Update, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index 16fc9ec0e..6e8907f20 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1242,6 +1242,8 @@ pub enum TableFactor { lateral: bool, subquery: Box, alias: Option, + /// Optional table sample modifier + sample: Option, }, /// `TABLE()[ AS ]` TableFunction { @@ -1922,6 +1924,7 @@ impl fmt::Display for TableFactor { lateral, subquery, alias, + sample, } => { if *lateral { write!(f, "LATERAL ")?; @@ -1934,6 +1937,9 @@ impl fmt::Display for TableFactor { if let Some(alias) = alias { write!(f, " {alias}")?; } + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { + write!(f, " {sample}")?; + } Ok(()) } TableFactor::Function { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index f88b30296..3177baf11 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1306,6 +1306,10 @@ impl Spanned for Insert { assignments, settings: _, // todo, clickhouse specific format_clause: _, // todo, clickhouse specific + insert_first: _, // snowflake multi-table insert + multi_table_into_clauses: _, // snowflake multi-table insert + multi_table_when_clauses: _, // snowflake multi-table insert + multi_table_else_clause: _, // snowflake multi-table insert } = self; union_spans( @@ -1913,6 +1917,7 @@ impl Spanned for TableFactor { lateral: _, subquery, alias, + sample: _, } => subquery .span() .union_opt(&alias.as_ref().map(|alias| alias.span())), diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index ed01c128b..584e0636e 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -30,10 +30,12 @@ use crate::ast::{ AlterTable, AlterTableOperation, AlterTableType, CatalogSyncNamespaceMode, ColumnOption, ColumnPolicy, ColumnPolicyProperty, ContactEntry, CopyIntoSnowflakeKind, CreateTableLikeKind, DollarQuotedString, Ident, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, - IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, ObjectName, ObjectNamePart, - RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, - StorageSerializationPolicy, TagsColumnOption, Value, WrappedCollection, + IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, Insert, MultiTableInsertIntoClause, + MultiTableInsertValue, MultiTableInsertValues, MultiTableInsertWhenClause, ObjectName, + ObjectNamePart, RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, + StorageSerializationPolicy, TableObject, TagsColumnOption, Value, WrappedCollection, }; +use crate::tokenizer::TokenWithSpan; use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; use crate::parser::{IsOptional, Parser, ParserError}; @@ -318,6 +320,30 @@ impl Dialect for SnowflakeDialect { parser.prev_token(); } + // Check for multi-table INSERT (Snowflake specific) + // INSERT [OVERWRITE] ALL ... or INSERT [OVERWRITE] FIRST ... + if parser.parse_keyword(Keyword::INSERT) { + let insert_token = parser.get_current_token().clone(); + let overwrite = parser.parse_keyword(Keyword::OVERWRITE); + + // Check for ALL or FIRST keyword + if let Some(kw) = parser.parse_one_of_keywords(&[Keyword::ALL, Keyword::FIRST]) { + let insert_first = kw == Keyword::FIRST; + return Some(parse_multi_table_insert( + parser, + insert_token, + overwrite, + insert_first, + )); + } + + // Not a multi-table insert, rewind + if overwrite { + parser.prev_token(); // rewind OVERWRITE + } + parser.prev_token(); // rewind INSERT + } + None } @@ -1628,3 +1654,173 @@ fn parse_show_objects(terse: bool, parser: &mut Parser) -> Result +/// +/// -- Conditional multi-table insert +/// INSERT [ OVERWRITE ] { FIRST | ALL } +/// { WHEN THEN intoClause [ ... ] } +/// [ ... ] +/// [ ELSE intoClause ] +/// +/// ``` +/// +/// See: +fn parse_multi_table_insert( + parser: &mut Parser, + insert_token: TokenWithSpan, + overwrite: bool, + insert_first: bool, +) -> Result { + // Check if this is conditional (has WHEN clauses) or unconditional (direct INTO clauses) + let is_conditional = parser.peek_keyword(Keyword::WHEN); + + let (multi_table_into_clauses, multi_table_when_clauses, multi_table_else_clause) = + if is_conditional { + // Conditional multi-table insert: WHEN clauses + let (when_clauses, else_clause) = parse_multi_table_insert_when_clauses(parser)?; + (vec![], when_clauses, else_clause) + } else { + // Unconditional multi-table insert: direct INTO clauses + let into_clauses = parse_multi_table_insert_into_clauses(parser)?; + (into_clauses, vec![], None) + }; + + // Parse the source query + let source = parser.parse_query()?; + + Ok(Statement::Insert(Insert { + insert_token: insert_token.into(), + or: None, + ignore: false, + into: false, + table: TableObject::TableName(ObjectName(vec![])), // Not used for multi-table insert + table_alias: None, + columns: vec![], + overwrite, + source: Some(source), + assignments: vec![], + partitioned: None, + after_columns: vec![], + has_table_keyword: false, + on: None, + returning: None, + replace_into: false, + priority: None, + insert_alias: None, + settings: None, + format_clause: None, + insert_first, + multi_table_into_clauses, + multi_table_when_clauses, + multi_table_else_clause, + })) +} + +/// Parse one or more INTO clauses for multi-table INSERT. +fn parse_multi_table_insert_into_clauses( + parser: &mut Parser, +) -> Result, ParserError> { + let mut into_clauses = vec![]; + while parser.parse_keyword(Keyword::INTO) { + into_clauses.push(parse_multi_table_insert_into_clause(parser)?); + } + if into_clauses.is_empty() { + return parser.expected("INTO clause in multi-table INSERT", parser.peek_token()); + } + Ok(into_clauses) +} + +/// Parse a single INTO clause for multi-table INSERT. +/// +/// Syntax: `INTO [ ( ) ] [ VALUES ( ) ]` +fn parse_multi_table_insert_into_clause( + parser: &mut Parser, +) -> Result { + let table_name = parser.parse_object_name(false)?; + + // Parse optional column list + let columns = if parser.peek_token() == Token::LParen && !parser.peek_keyword(Keyword::VALUES) { + // Check if this is a column list (not VALUES) + let peeked = parser.peek_tokens::<2>(); + if peeked[0] == Token::LParen { + // Could be columns - try to parse + parser + .maybe_parse(|p| p.parse_parenthesized_column_list(IsOptional::Optional, false))? + .unwrap_or_default() + } else { + vec![] + } + } else { + vec![] + }; + + // Parse optional VALUES clause + let values = if parser.parse_keyword(Keyword::VALUES) { + parser.expect_token(&Token::LParen)?; + let values = parser.parse_comma_separated(parse_multi_table_insert_value)?; + parser.expect_token(&Token::RParen)?; + Some(MultiTableInsertValues { values }) + } else { + None + }; + + Ok(MultiTableInsertIntoClause { + table_name, + columns, + values, + }) +} + +/// Parse a single value in a multi-table INSERT VALUES clause. +fn parse_multi_table_insert_value(parser: &mut Parser) -> Result { + if parser.parse_keyword(Keyword::DEFAULT) { + Ok(MultiTableInsertValue::Default) + } else { + Ok(MultiTableInsertValue::Expr(parser.parse_expr()?)) + } +} + +/// Parse WHEN clauses for conditional multi-table INSERT. +fn parse_multi_table_insert_when_clauses( + parser: &mut Parser, +) -> Result<(Vec, Option>), ParserError> +{ + let mut when_clauses = vec![]; + let mut else_clause = None; + + // Parse WHEN clauses + while parser.parse_keyword(Keyword::WHEN) { + let condition = parser.parse_expr()?; + parser.expect_keyword(Keyword::THEN)?; + + // Parse INTO clauses for this WHEN + let into_clauses = parse_multi_table_insert_into_clauses(parser)?; + + when_clauses.push(MultiTableInsertWhenClause { + condition, + into_clauses, + }); + } + + // Parse optional ELSE clause + if parser.parse_keyword(Keyword::ELSE) { + else_clause = Some(parse_multi_table_insert_into_clauses(parser)?); + } + + if when_clauses.is_empty() { + return parser.expected( + "at least one WHEN clause in conditional multi-table INSERT", + parser.peek_token(), + ); + } + + Ok((when_clauses, else_clause)) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 373076f12..98bffa427 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14793,6 +14793,7 @@ impl<'a> Parser<'a> { pipe_operators: vec![], }), alias, + sample: None, }) } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.parse_keyword(Keyword::UNNEST) @@ -15593,6 +15594,14 @@ impl<'a> Parser<'a> { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; let alias = self.maybe_parse_table_alias()?; + + // Parse optional SAMPLE clause after alias + let sample = if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + Some(TableSampleKind::AfterTableAlias(parsed_sample)) + } else { + None + }; + Ok(TableFactor::Derived { lateral: match lateral { Lateral => true, @@ -15600,6 +15609,7 @@ impl<'a> Parser<'a> { }, subquery, alias, + sample, }) } @@ -16617,6 +16627,10 @@ impl<'a> Parser<'a> { insert_alias, settings, format_clause, + insert_first: false, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, })) } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9f549e4d0..81d8e6349 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -512,7 +512,8 @@ fn parse_update_set_from() { format_clause: None, pipe_operators: vec![], }), - alias: table_alias(true, "t2") + alias: table_alias(true, "t2"), + sample: None, }, joins: vec![] }])), @@ -7792,6 +7793,7 @@ fn parse_derived_tables() { lateral: false, subquery: Box::new(verified_query("(SELECT 1) UNION (SELECT 2)")), alias: table_alias(true, "t1"), + sample: None, }, joins: vec![Join { relation: table_from_name(ObjectName::from(vec!["t2".into()])), @@ -8800,6 +8802,7 @@ fn lateral_derived() { lateral, ref subquery, alias: Some(ref alias), + sample: _, } = join.relation { assert_eq!(lateral_in, lateral); @@ -9878,6 +9881,7 @@ fn parse_merge() { pipe_operators: vec![], }), alias: table_alias(true, "stg"), + sample: None, } ); assert_eq!(source, source_no_into); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 24707604a..f103468b9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5370,6 +5370,10 @@ fn test_simple_postgres_insert_with_alias() { insert_alias: None, settings: None, format_clause: None, + insert_first: false, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } @@ -5444,6 +5448,10 @@ fn test_simple_postgres_insert_with_alias() { insert_alias: None, settings: None, format_clause: None, + insert_first: false, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } @@ -5516,6 +5524,10 @@ fn test_simple_insert_with_quoted_alias() { insert_alias: None, settings: None, format_clause: None, + insert_first: false, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 37e9f8cb4..990b35bc2 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3515,6 +3515,23 @@ fn test_table_sample() { snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) SEED (1)"); } +#[test] +fn test_subquery_sample() { + // Test SAMPLE clause on subqueries (derived tables) + snowflake_and_generic().verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10000 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) AS t SAMPLE (50 PERCENT)"); + // Nested subquery with SAMPLE + snowflake_and_generic().verified_stmt( + "SELECT * FROM (SELECT * FROM (SELECT report_from FROM mytable) SAMPLE (10000 ROWS)) AS anon_1", + ); + // SAMPLE with SEED on subquery + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10) SEED (42)"); +} + #[test] fn parse_ls_and_rm() { snowflake().one_statement_parses_to("LS @~", "LIST @~");