Skip to content

Commit e233f73

Browse files
authored
Add support for DuckDB underscore numeric literals (#891)
DuckDB allows underscores in numeric literals, e.g. `1_000_000`. This PR adds support for this. - Add `numberRegex` option to `TokenizerOptions` to allow custom number patterns - Update `Tokenizer` to use custom number regex when provided - Configure DuckDB formatter to support underscore separators in numbers - Add test for underscore numeric literals in DuckDB Let me know if you'd rather see this done differently! Thanks for the great library.
2 parents 6b0172f + f200db9 commit e233f73

File tree

4 files changed

+17
-0
lines changed

4 files changed

+17
-0
lines changed

src/languages/duckdb/duckdb.formatter.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ export const duckdb: DialectOptions = {
155155
reservedFunctionNames: functions,
156156
nestedBlockComments: true,
157157
extraParens: ['[]', '{}'],
158+
// Support underscore separators in numeric literals (e.g., 1_000_000)
159+
numberRegex:
160+
/(?:0x[0-9a-fA-F_]+|0b[01_]+|(?:-\s*)?(?:[0-9_]*\.[0-9_]+|[0-9_]+(?:\.[0-9_]*)?)(?:[eE][-+]?[0-9_]+(?:\.[0-9_]+)?)?)(?![\w\p{Alphabetic}])/uy,
158161
stringTypes: [
159162
'$$',
160163
"''-qq",

src/lexer/Tokenizer.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ export default class Tokenizer {
5151
{
5252
type: TokenType.NUMBER,
5353
regex:
54+
cfg.numberRegex ??
5455
/(?:0x[0-9a-fA-F]+|0b[01]+|(?:-\s*)?(?:[0-9]*\.[0-9]+|[0-9]+(?:\.[0-9]*)?)(?:[eE][-+]?[0-9]+(?:\.[0-9]+)?)?)(?![\w\p{Alphabetic}])/uy,
5556
},
5657
// RESERVED_PHRASE is matched before all other keyword tokens

src/lexer/TokenizerOptions.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ export interface TokenizerOptions {
100100
propertyAccessOperators?: string[];
101101
// Enables PostgreSQL-specific OPERATOR(...) syntax
102102
operatorKeyword?: boolean;
103+
// Custom regex pattern for number tokens (defaults to standard SQL number pattern)
104+
numberRegex?: RegExp;
103105
// Allows custom modifications on the token array.
104106
// Called after the whole input string has been split into tokens.
105107
// The result of this will be the output of the tokenizer.

test/duckdb.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,4 +214,15 @@ describe('DuckDBFormatter', () => {
214214
1 IS NOT NULL;
215215
`);
216216
});
217+
218+
it('supports underscore separators in numeric literals', () => {
219+
expect(format('SELECT 1_000_000, 3.14_159, 0x1A_2B_3C, 0b1010_0001, 1.5e+1_0;')).toBe(dedent`
220+
SELECT
221+
1_000_000,
222+
3.14_159,
223+
0x1A_2B_3C,
224+
0b1010_0001,
225+
1.5e+1_0;
226+
`);
227+
});
217228
});

0 commit comments

Comments
 (0)