Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
Ok(p!(DoubleColon))
}
Token::Colon => match parser.peek_nth_token(1).token {
// When colon is followed by a string or a number, it's usually in MAP syntax.
Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()),
// In other cases, it's used in semi-structured data traversal like in variant or JSON
// string columns. See `JsonAccess`.
_ => Ok(p!(Colon)),
},
Token::Arrow
| Token::LongArrow
| Token::HashArrow
Expand Down Expand Up @@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any {
Precedence::Ampersand => 23,
Precedence::Caret => 22,
Precedence::Pipe => 21,
Precedence::Colon => 21,
Precedence::Between => 20,
Precedence::Eq => 20,
Precedence::Like => 19,
Expand Down Expand Up @@ -1232,6 +1240,7 @@ pub enum Precedence {
Ampersand,
Caret,
Pipe,
Colon,
Between,
Eq,
Like,
Expand Down
9 changes: 9 additions & 0 deletions src/dialect/mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect {
None
}
}

fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
let token = parser.peek_token();
match token.token {
// lowest prec to prevent it from turning into a binary op
Token::Colon => Some(Ok(self.prec_unknown())),
_ => None,
}
}
}

impl MsSqlDialect {
Expand Down
3 changes: 3 additions & 0 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
| Token::ShiftRight
| Token::ShiftLeft
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
// lowest prec to prevent it from turning into a binary op
Token::Colon => Some(Ok(self.prec_unknown())),
_ => None,
}
}
Expand All @@ -159,6 +161,7 @@ impl Dialect for PostgreSqlDialect {
Precedence::Ampersand => PG_OTHER_PREC,
Precedence::Caret => CARET_PREC,
Precedence::Pipe => PG_OTHER_PREC,
Precedence::Colon => PG_OTHER_PREC,
Precedence::Between => BETWEEN_LIKE_PREC,
Precedence::Eq => EQ_PREC,
Precedence::Like => BETWEEN_LIKE_PREC,
Expand Down
9 changes: 6 additions & 3 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3847,7 +3847,8 @@ impl<'a> Parser<'a> {
let lower_bound = if self.consume_token(&Token::Colon) {
None
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the changes to the subscript behavior, we don't seem to have any new tests to accompany them?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes actually came from a failing test, but I will add some more tests to explicitly look for this behavior.

};

// check for end
Expand Down Expand Up @@ -3875,7 +3876,8 @@ impl<'a> Parser<'a> {
stride: None,
});
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};

// check for end
Expand All @@ -3892,7 +3894,8 @@ impl<'a> Parser<'a> {
let stride = if self.consume_token(&Token::RBracket) {
None
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};

if stride.is_some() {
Expand Down
123 changes: 123 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17972,3 +17972,126 @@ fn parse_select_parenthesized_wildcard() {
assert_eq!(select2.projection.len(), 1);
assert!(matches!(select2.projection[0], SelectItem::Wildcard(_)));
}

// https://docs.snowflake.com/en/user-guide/querying-semistructured
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// https://docs.snowflake.com/en/user-guide/querying-semistructured

#[test]
fn parse_semi_structured_data_traversal() {
let dialects = TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(SnowflakeDialect {}),
]);
Comment on lines +17979 to +17982
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be all dialects instead? we don't seem to have special handling for generic and snowflake

Copy link
Author

@Samyak2 Samyak2 Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually do have special handling for SnowflakeDialect and GenericDialect:

|| (dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == *tok)

(this is in main currently, not a change in this PR)

From what I know, only Snowflake and Databricks support this syntax. So I can expand the test to include Databricks as well. But I don't think it would make sense to run this test on dialects that don't support this syntax. What do you think?


// most basic case
let sql = "SELECT a:b FROM t";
let select = dialects.verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);

// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);

dialects.verified_stmt("SELECT a:b::INT FROM t");

// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);

// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);

// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
}
115 changes: 2 additions & 113 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() {
// https://docs.snowflake.com/en/user-guide/querying-semistructured
#[test]
fn parse_semi_structured_data_traversal() {
// most basic case
let sql = "SELECT a:b FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);

// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);
// see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test
// cases. This test only has Snowflake-specific syntax like array access.

// expressions are allowed in bracket notation
let sql = r#"SELECT a[2 + 2] FROM t"#;
Expand All @@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() {
select.projection[0]
);

snowflake().verified_stmt("SELECT a:b::INT FROM t");

// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);

// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);

// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);

// dot and bracket notation can be mixed (starting with bracket case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a[0].foo.bar"#;
Expand Down