From 774a7f5acaaa348c9c2e549958924d6b14bcfaaf Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Wed, 26 Nov 2025 12:48:26 -0500 Subject: [PATCH 1/3] support transpilation of SHA256 from bigquery to duckdb --- sqlglot/dialects/bigquery.py | 4 +++- sqlglot/dialects/clickhouse.py | 2 ++ sqlglot/dialects/dialect.py | 2 ++ sqlglot/dialects/duckdb.py | 2 ++ sqlglot/dialects/postgres.py | 2 ++ sqlglot/dialects/presto.py | 2 ++ sqlglot/dialects/redshift.py | 1 + sqlglot/dialects/snowflake.py | 1 + sqlglot/dialects/spark2.py | 1 + sqlglot/typing/bigquery.py | 1 + tests/dialects/test_bigquery.py | 4 ++-- tests/dialects/test_duckdb.py | 1 + 12 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 7f8c7747e9..e5394a606d 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -32,6 +32,7 @@ unit_to_var, strposition_sql, groupconcat_sql, + build_sha2_digest_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import seq_get, split_num_words @@ -593,7 +594,7 @@ class Parser(parser.Parser): "REGEXP_EXTRACT_ALL": _build_regexp_extract( exp.RegexpExtractAll, default_group=exp.Literal.number(0) ), - "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), + "SHA256": lambda args: exp.SHA2Digest(this=seq_get(args, 0), length=exp.Literal.number(256)), "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), "SPLIT": lambda args: exp.Split( # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split @@ -1153,6 +1154,7 @@ class Generator(generator.Generator): exp.SHA: rename_func("SHA1"), exp.SHA2: sha256_sql, exp.SHA1Digest: rename_func("SHA1"), + exp.SHA2Digest: build_sha2_digest_sql, exp.StabilityProperty: lambda self, e: ( "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" ), diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index 2463ff7279..f3e86e816e 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -24,6 +24,7 @@ timestamptrunc_sql, unit_to_var, trim_sql, + build_sha2_digest_sql, ) from sqlglot.generator import Generator from sqlglot.helper import is_int, seq_get @@ -1201,6 +1202,7 @@ class Generator(generator.Generator): exp.SHA: rename_func("SHA1"), exp.SHA1Digest: rename_func("SHA1"), exp.SHA2: sha256_sql, + exp.SHA2Digest: build_sha2_digest_sql, exp.Split: lambda self, e: self.func( "splitByString", e.args.get("expression"), e.this, e.args.get("limit") ), diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index 3f01b2be65..f04bf75d54 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -1872,6 +1872,8 @@ def build_timestamp_from_parts(args: t.List) -> exp.Func: def sha256_sql(self: Generator, expression: exp.SHA2) -> str: return self.func(f"SHA{expression.text('length') or '256'}", expression.this) +def build_sha2_digest_sql(self: Generator, expression: exp.SHA2Digest) -> str: + return self.func(f"SHA{expression.text('length') or '256'}", expression.this) def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str: start = expression.args.get("start") diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index b3f0e23661..3f2a98ce6f 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -40,6 +40,7 @@ groupconcat_sql, inline_array_unless_query, regexp_replace_global_modifier, + build_sha2_digest_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import is_date_unit, seq_get @@ -884,6 +885,7 @@ class Generator(generator.Generator): exp.Initcap: _initcap_sql, exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)), + exp.SHA2Digest: lambda self, e: self.func("UNHEX", build_sha2_digest_sql(self, e)), exp.MonthsBetween: lambda self, e: self.func( "DATEDIFF", "'month'", diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py index 86c19ccc4f..ed046501c8 100644 --- a/sqlglot/dialects/postgres.py +++ b/sqlglot/dialects/postgres.py @@ -37,6 +37,7 @@ count_if_to_sum, groupconcat_sql, regexp_replace_global_modifier, + build_sha2_digest_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import is_int, seq_get @@ -698,6 +699,7 @@ class Generator(generator.Generator): ] ), exp.SHA2: sha256_sql, + exp.SHA2Digest: build_sha2_digest_sql, exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="POSITION"), exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index 8f84ad9cbf..35220ab584 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -31,6 +31,7 @@ sequence_sql, build_regexp_extract, explode_to_unnest_sql, + build_sha2_digest_sql, ) from sqlglot.dialects.hive import Hive from sqlglot.dialects.mysql import MySQL @@ -547,6 +548,7 @@ class Generator(generator.Generator): exp.SHA: rename_func("SHA1"), exp.SHA1Digest: rename_func("SHA1"), exp.SHA2: sha256_sql, + exp.SHA2Digest: build_sha2_digest_sql } RESERVED_KEYWORDS = { diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py index a21172689f..7a63c97141 100644 --- a/sqlglot/dialects/redshift.py +++ b/sqlglot/dialects/redshift.py @@ -219,6 +219,7 @@ class Generator(Postgres.Generator): exp.TsOrDsAdd: date_delta_sql("DATEADD"), exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e), + exp.SHA2Digest: lambda self, e: self.func("SHA2", e.this, e.args.get("length") or exp.Literal.number(256)), } # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index e1f786db1b..c46ee3fa56 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -1478,6 +1478,7 @@ class Generator(generator.Generator): exp.ArrayConcatAgg: lambda self, e: self.func( "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) ), + exp.SHA2Digest: lambda self, e: self.func("SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256)), } SUPPORTED_JSON_PATH_PARTS = { diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index d74c71207c..0545ba6a7b 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -282,6 +282,7 @@ class Generator(Hive.Generator): transforms.any_to_exists, ] ), + exp.SHA2Digest: lambda self, e: self.func("SHA2", e.this, e.args.get("length") or exp.Literal.number(256)), exp.StrToDate: _str_to_date, exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", unit_to_str(e), e.this), diff --git a/sqlglot/typing/bigquery.py b/sqlglot/typing/bigquery.py index fe98816c1b..a8e0ca699c 100644 --- a/sqlglot/typing/bigquery.py +++ b/sqlglot/typing/bigquery.py @@ -175,6 +175,7 @@ def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: exp.SHA, exp.SHA2, exp.SHA1Digest, + exp.SHA2Digest, exp.Unhex, } }, diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 2ce3c04a57..e74486feaa 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -1041,8 +1041,8 @@ def test_bigquery(self): "presto": "SHA256(x)", "redshift": "SHA2(x, 256)", "trino": "SHA256(x)", - "duckdb": "SHA256(x)", - "snowflake": "SHA2(x, 256)", + "duckdb": "UNHEX(SHA256(x))", + "snowflake": "SHA2_BINARY(x, 256)", }, ) self.validate_all( diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index e97760b86d..9ba4b35bac 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -552,6 +552,7 @@ def test_duckdb(self): ) self.validate_identity("DATE_SUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous) self.validate_identity("DATESUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous) + self.validate_identity("SELECT SHA256('abc')") self.validate_all("0b1010", write={"": "0 AS b1010"}) self.validate_all("0x1010", write={"": "0 AS x1010"}) From e3cdb9afdb90acfe6941f183c1ef426efe9fe06f Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Wed, 26 Nov 2025 12:53:24 -0500 Subject: [PATCH 2/3] fix format --- sqlglot/dialects/bigquery.py | 4 +++- sqlglot/dialects/dialect.py | 2 ++ sqlglot/dialects/presto.py | 2 +- sqlglot/dialects/redshift.py | 4 +++- sqlglot/dialects/snowflake.py | 4 +++- sqlglot/dialects/spark2.py | 4 +++- 6 files changed, 15 insertions(+), 5 deletions(-) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index e5394a606d..07982be3c2 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -594,7 +594,9 @@ class Parser(parser.Parser): "REGEXP_EXTRACT_ALL": _build_regexp_extract( exp.RegexpExtractAll, default_group=exp.Literal.number(0) ), - "SHA256": lambda args: exp.SHA2Digest(this=seq_get(args, 0), length=exp.Literal.number(256)), + "SHA256": lambda args: exp.SHA2Digest( + this=seq_get(args, 0), length=exp.Literal.number(256) + ), "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), "SPLIT": lambda args: exp.Split( # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index f04bf75d54..0c7dcbc8bb 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -1872,9 +1872,11 @@ def build_timestamp_from_parts(args: t.List) -> exp.Func: def sha256_sql(self: Generator, expression: exp.SHA2) -> str: return self.func(f"SHA{expression.text('length') or '256'}", expression.this) + def build_sha2_digest_sql(self: Generator, expression: exp.SHA2Digest) -> str: return self.func(f"SHA{expression.text('length') or '256'}", expression.this) + def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str: start = expression.args.get("start") end = expression.args.get("end") diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index 35220ab584..fe4310e7f9 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -548,7 +548,7 @@ class Generator(generator.Generator): exp.SHA: rename_func("SHA1"), exp.SHA1Digest: rename_func("SHA1"), exp.SHA2: sha256_sql, - exp.SHA2Digest: build_sha2_digest_sql + exp.SHA2Digest: build_sha2_digest_sql, } RESERVED_KEYWORDS = { diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py index 7a63c97141..c380ec88cd 100644 --- a/sqlglot/dialects/redshift.py +++ b/sqlglot/dialects/redshift.py @@ -219,7 +219,9 @@ class Generator(Postgres.Generator): exp.TsOrDsAdd: date_delta_sql("DATEADD"), exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e), - exp.SHA2Digest: lambda self, e: self.func("SHA2", e.this, e.args.get("length") or exp.Literal.number(256)), + exp.SHA2Digest: lambda self, e: self.func( + "SHA2", e.this, e.args.get("length") or exp.Literal.number(256) + ), } # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index c46ee3fa56..4e9663ab0f 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -1478,7 +1478,9 @@ class Generator(generator.Generator): exp.ArrayConcatAgg: lambda self, e: self.func( "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) ), - exp.SHA2Digest: lambda self, e: self.func("SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256)), + exp.SHA2Digest: lambda self, e: self.func( + "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) + ), } SUPPORTED_JSON_PATH_PARTS = { diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index 0545ba6a7b..d728decdc0 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -282,7 +282,9 @@ class Generator(Hive.Generator): transforms.any_to_exists, ] ), - exp.SHA2Digest: lambda self, e: self.func("SHA2", e.this, e.args.get("length") or exp.Literal.number(256)), + exp.SHA2Digest: lambda self, e: self.func( + "SHA2", e.this, e.args.get("length") or exp.Literal.number(256) + ), exp.StrToDate: _str_to_date, exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", unit_to_str(e), e.this), From 2ad84cddb052c53fbd1ec3b5813ac3f3bec023c1 Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Wed, 26 Nov 2025 14:03:57 -0500 Subject: [PATCH 3/3] rename function --- sqlglot/dialects/bigquery.py | 4 ++-- sqlglot/dialects/clickhouse.py | 4 ++-- sqlglot/dialects/dialect.py | 2 +- sqlglot/dialects/duckdb.py | 4 ++-- sqlglot/dialects/postgres.py | 4 ++-- sqlglot/dialects/presto.py | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 07982be3c2..a6a885f9a0 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -32,7 +32,7 @@ unit_to_var, strposition_sql, groupconcat_sql, - build_sha2_digest_sql, + sha2_digest_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import seq_get, split_num_words @@ -1156,7 +1156,7 @@ class Generator(generator.Generator): exp.SHA: rename_func("SHA1"), exp.SHA2: sha256_sql, exp.SHA1Digest: rename_func("SHA1"), - exp.SHA2Digest: build_sha2_digest_sql, + exp.SHA2Digest: sha2_digest_sql, exp.StabilityProperty: lambda self, e: ( "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" ), diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index f3e86e816e..823331d8c8 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -24,7 +24,7 @@ timestamptrunc_sql, unit_to_var, trim_sql, - build_sha2_digest_sql, + sha2_digest_sql, ) from sqlglot.generator import Generator from sqlglot.helper import is_int, seq_get @@ -1202,7 +1202,7 @@ class Generator(generator.Generator): exp.SHA: rename_func("SHA1"), exp.SHA1Digest: rename_func("SHA1"), exp.SHA2: sha256_sql, - exp.SHA2Digest: build_sha2_digest_sql, + exp.SHA2Digest: sha2_digest_sql, exp.Split: lambda self, e: self.func( "splitByString", e.args.get("expression"), e.this, e.args.get("limit") ), diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index 0c7dcbc8bb..064e2738d9 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -1873,7 +1873,7 @@ def sha256_sql(self: Generator, expression: exp.SHA2) -> str: return self.func(f"SHA{expression.text('length') or '256'}", expression.this) -def build_sha2_digest_sql(self: Generator, expression: exp.SHA2Digest) -> str: +def sha2_digest_sql(self: Generator, expression: exp.SHA2Digest) -> str: return self.func(f"SHA{expression.text('length') or '256'}", expression.this) diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index 3f2a98ce6f..ff129a3ca2 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -40,7 +40,7 @@ groupconcat_sql, inline_array_unless_query, regexp_replace_global_modifier, - build_sha2_digest_sql, + sha2_digest_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import is_date_unit, seq_get @@ -885,7 +885,7 @@ class Generator(generator.Generator): exp.Initcap: _initcap_sql, exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)), - exp.SHA2Digest: lambda self, e: self.func("UNHEX", build_sha2_digest_sql(self, e)), + exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)), exp.MonthsBetween: lambda self, e: self.func( "DATEDIFF", "'month'", diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py index ed046501c8..d88f7c1e82 100644 --- a/sqlglot/dialects/postgres.py +++ b/sqlglot/dialects/postgres.py @@ -37,7 +37,7 @@ count_if_to_sum, groupconcat_sql, regexp_replace_global_modifier, - build_sha2_digest_sql, + sha2_digest_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import is_int, seq_get @@ -699,7 +699,7 @@ class Generator(generator.Generator): ] ), exp.SHA2: sha256_sql, - exp.SHA2Digest: build_sha2_digest_sql, + exp.SHA2Digest: sha2_digest_sql, exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="POSITION"), exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index fe4310e7f9..d7b5c49d14 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -31,7 +31,7 @@ sequence_sql, build_regexp_extract, explode_to_unnest_sql, - build_sha2_digest_sql, + sha2_digest_sql, ) from sqlglot.dialects.hive import Hive from sqlglot.dialects.mysql import MySQL @@ -548,7 +548,7 @@ class Generator(generator.Generator): exp.SHA: rename_func("SHA1"), exp.SHA1Digest: rename_func("SHA1"), exp.SHA2: sha256_sql, - exp.SHA2Digest: build_sha2_digest_sql, + exp.SHA2Digest: sha2_digest_sql, } RESERVED_KEYWORDS = {