Skip to content

Commit f3bdcb0

Browse files
authored
Fix(duckdb): fix JSON pointer path parsing, reduce warning noise (tobymao#2911)
* Fix(duckdb): fix JSON pointer path parsing, reduce warning noise * Rename
1 parent 3b533c4 commit f3bdcb0

File tree

4 files changed

+44
-20
lines changed

4 files changed

+44
-20
lines changed

sqlglot/dialects/dialect.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import logging
34
import typing as t
45
from enum import Enum, auto
56
from functools import reduce
@@ -8,7 +9,7 @@
89
from sqlglot.errors import ParseError
910
from sqlglot.generator import Generator
1011
from sqlglot.helper import AutoName, flatten, is_int, seq_get
11-
from sqlglot.jsonpath import generate as generate_json_path
12+
from sqlglot.jsonpath import generate as generate_json_path, parse as parse_json_path
1213
from sqlglot.parser import Parser
1314
from sqlglot.time import TIMEZONES, format_time
1415
from sqlglot.tokens import Token, Tokenizer, TokenType
@@ -20,6 +21,8 @@
2021
if t.TYPE_CHECKING:
2122
from sqlglot._typing import B, E
2223

24+
logger = logging.getLogger("sqlglot")
25+
2326

2427
class Dialects(str, Enum):
2528
"""Dialects supported by SQLGLot."""
@@ -441,6 +444,19 @@ def quote_identifier(self, expression: E, identify: bool = True) -> E:
441444

442445
return expression
443446

447+
def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
448+
if isinstance(path, exp.Literal):
449+
path_text = path.name
450+
if path.is_number:
451+
path_text = f"[{path_text}]"
452+
453+
try:
454+
return exp.JSONPath(expressions=parse_json_path(path_text))
455+
except ParseError:
456+
logger.warning(f"Invalid JSON path syntax: {path_text}")
457+
458+
return path
459+
444460
def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
445461
return self.parser(**opts).parse(self.tokenize(sql), sql)
446462

sqlglot/dialects/duckdb.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,18 @@ class DuckDB(Dialect):
171171
# https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
172172
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
173173

174+
def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
175+
if isinstance(path, exp.Literal):
176+
# DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
177+
# Additionally, it allows accessing the back of lists using the `[#-i]` syntax.
178+
# This check ensures we'll avoid trying to parse these as JSON paths, which can
179+
# either result in a noisy warning or in an invalid representation of the path.
180+
path_text = path.name
181+
if path_text.startswith("/") or "[#" in path_text:
182+
return path
183+
184+
return super().to_json_path(path)
185+
174186
class Tokenizer(tokens.Tokenizer):
175187
KEYWORDS = {
176188
**tokens.Tokenizer.KEYWORDS,

sqlglot/parser.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from sqlglot import exp
88
from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
99
from sqlglot.helper import apply_index_offset, ensure_list, seq_get
10-
from sqlglot.jsonpath import parse as _parse_json_path
1110
from sqlglot.time import format_time
1211
from sqlglot.tokens import Token, Tokenizer, TokenType
1312
from sqlglot.trie import TrieResult, in_trie, new_trie
@@ -61,22 +60,11 @@ def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func:
6160
return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
6261

6362

64-
def parse_json_path(path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
65-
if isinstance(path, exp.Literal):
66-
path_text = path.name
67-
if path.is_number:
68-
path_text = f"[{path_text}]"
69-
try:
70-
return exp.JSONPath(expressions=_parse_json_path(path_text))
71-
except ParseError:
72-
logger.warning(f"Invalid JSON path syntax: {path_text}")
73-
74-
return path
75-
76-
77-
def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List], E]:
78-
def _parser(args: t.List) -> E:
79-
expression = expr_type(this=seq_get(args, 0), expression=parse_json_path(seq_get(args, 1)))
63+
def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]:
64+
def _parser(args: t.List, dialect: Dialect) -> E:
65+
expression = expr_type(
66+
this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
67+
)
8068
if len(args) > 2 and expr_type is exp.JSONExtract:
8169
expression.set("expressions", args[2:])
8270

@@ -558,12 +546,12 @@ class Parser(metaclass=_Parser):
558546
TokenType.ARROW: lambda self, this, path: self.expression(
559547
exp.JSONExtract,
560548
this=this,
561-
expression=parse_json_path(path),
549+
expression=self.dialect.to_json_path(path),
562550
),
563551
TokenType.DARROW: lambda self, this, path: self.expression(
564552
exp.JSONExtractScalar,
565553
this=this,
566-
expression=parse_json_path(path),
554+
expression=self.dialect.to_json_path(path),
567555
),
568556
TokenType.HASH_ARROW: lambda self, this, path: self.expression(
569557
exp.JSONBExtract,

tests/dialects/test_duckdb.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ def test_duckdb(self):
8989
},
9090
)
9191

92+
self.validate_identity("""SELECT '{"duck": [1, 2, 3]}' -> '$.duck[#-1]'""")
93+
self.validate_all(
94+
"""SELECT JSON_EXTRACT('{"duck": [1, 2, 3]}', '/duck/0')""",
95+
write={
96+
"": """SELECT JSON_EXTRACT('{"duck": [1, 2, 3]}', '/duck/0')""",
97+
"duckdb": """SELECT '{"duck": [1, 2, 3]}' -> '/duck/0'""",
98+
},
99+
)
92100
self.validate_all(
93101
"""SELECT JSON('{"fruit":"banana"}') -> 'fruit'""",
94102
write={

0 commit comments

Comments
 (0)