diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index c0adae18f5..36f7478ae7 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -17,6 +17,7 @@ date_add_interval_sql, datestrtodate_sql, build_formatted_time, + extract_week_unit_info, filter_array_using_unnest, if_sql, inline_array_unless_query, @@ -209,6 +210,40 @@ def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> return self.func("DATE_DIFF", expression.this, expression.expression, unit) +def _generate_bq_datetime_diff_unit(self: BigQuery.Generator, expression: exp.Expression) -> str: + """ + Generate unit for *_DIFF functions, converting Week expressions to BigQuery syntax. + + Canonical form -> BigQuery syntax: + - Week(SUNDAY) -> WEEK (BigQuery's default) + - Week(MONDAY) -> WEEK(MONDAY) (preserved during round-trip) + - Var(ISOWEEK) -> ISOWEEK (preserved as-is) + + """ + unit = expression.args.get("unit") + + # Preserve ISOWEEK/WEEKISO as-is. + # Handle ISOWEEK/WEEKISO Var expressions + if isinstance(unit, exp.Var) and unit.name.upper() in ("ISOWEEK", "WEEKISO"): + return self.sql(unit) + + week_info = extract_week_unit_info(unit) + + if week_info: + day_name, _ = week_info + if day_name == "SUNDAY": + return "WEEK" + + if isinstance(unit, exp.WeekStart): + # Preserve WeekStart expressions as-is + return self.sql(unit) + + # Convert to WEEK(day) for all other cases + return self.sql(exp.Week(this=exp.var(day_name))) + + return self.sql(unit_to_var(expression)) + + def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: scale = expression.args.get("scale") timestamp = expression.this @@ -242,6 +277,44 @@ def _build_datetime(args: t.List) -> exp.Func: return exp.TimestampFromParts.from_arg_list(args) +def _normalize_week_unit(unit: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + """ + In BigQuery, plain WEEK defaults to Sunday-start weeks. + Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST for correct cross-dialect transpilation. + """ + if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK": + return exp.Week(this=exp.var("SUNDAY")) + + # This branch is necessary for the WEEK keyword in DATE_DIFF(..., WEEK) to be normalized. + if isinstance(unit, exp.Column) and not unit.table and isinstance(unit.this, exp.Identifier): + if unit.name.upper() == "WEEK": + return exp.Week(this=exp.var("SUNDAY")) + + return unit + + +def build_date_time_diff_with_week_normalization( + exp_class: t.Type[E], +) -> t.Callable[[t.List], E]: + """ + Factory for *_DIFF functions that normalizes plain WEEK units to WEEK(SUNDAY). + + These functions have signature: FUNC(expr1, expr2, date_part) + where date_part is at argument index 2. + + Supports: DATE_DIFF, DATETIME_DIFF, TIME_DIFF, TIMESTAMP_DIFF + """ + + def _builder(args: t.List) -> E: + return exp_class( + this=seq_get(args, 0), + expression=seq_get(args, 1), + unit=_normalize_week_unit(seq_get(args, 2)), + ) + + return _builder + + def _build_regexp_extract( expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None ) -> t.Callable[[t.List], E]: @@ -551,6 +624,7 @@ class Parser(parser.Parser): "CONTAINS_SUBSTR": _build_contains_substring, "DATE": _build_date, "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), + "DATE_DIFF": build_date_time_diff_with_week_normalization(exp.DateDiff), "DATE_SUB": build_date_delta_with_interval(exp.DateSub), "DATE_TRUNC": lambda args: exp.DateTrunc( unit=seq_get(args, 1), @@ -1092,7 +1166,7 @@ class Generator(generator.Generator): exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), exp.DateAdd: date_add_interval_sql("DATE", "ADD"), exp.DateDiff: lambda self, e: self.func( - "DATE_DIFF", e.this, e.expression, unit_to_var(e) + "DATE_DIFF", e.this, e.expression, _generate_bq_datetime_diff_unit(self, e) ), exp.DateFromParts: rename_func("DATE"), exp.DateStrToDate: datestrtodate_sql, diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index b3a583c454..1e68e7ac1f 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -832,6 +832,26 @@ class Dialect(metaclass=_Dialect): "CENTURIES": "CENTURY", } + # Mapping of week unit names to (start_day, iso_dow) tuples + # iso_dow follows ISO 8601 day-of-week numbering (Monday=1, ..., Sunday=7) + WEEK_UNIT_SEMANTICS = { + "WEEK": ("SUNDAY", 7), + "ISOWEEK": ("MONDAY", 1), + "WEEKISO": ("MONDAY", 1), + } + + # Days of week to ISO 8601 day-of-week numbers + # ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7 + WEEK_START_DAY_TO_DOW = { + "MONDAY": 1, + "TUESDAY": 2, + "WEDNESDAY": 3, + "THURSDAY": 4, + "FRIDAY": 5, + "SATURDAY": 6, + "SUNDAY": 7, + } + # Specifies what types a given type can be coerced into COERCES_TO: t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]] = {} @@ -1705,6 +1725,50 @@ def map_date_part(part, dialect: DialectType = Dialect): return part +def extract_week_unit_info( + unit: t.Optional[exp.Expression], dialect: DialectType = None +) -> t.Optional[t.Tuple[str, int]]: + """ + Extract week unit information from AST node. + + This helper provides a unified way to handle week units across dialects. + + Args: + unit: The unit expression (Var for ISOWEEK/WEEKISO, Week, or WeekStart) + dialect: Dialect used to access WEEK_UNIT_SEMANTICS and WEEK_START_DAY_TO_DOW mappings. + + Returns: + Tuple of (day_name, iso_dow) where iso_dow is ISO 8601 day number (Monday=1, Sunday=7), + or None if not a week unit or if day is dynamic (not a constant). + + Examples: + Week(Var('SUNDAY')) → ('SUNDAY', 7) + Var('ISOWEEK') → ('MONDAY', 1) + Column('week') → None (dynamic, not a constant) + + """ + dialect_instance = Dialect.get_or_raise(dialect) + + # Handle plain Var expressions for ISOWEEK/WEEKISO only + if isinstance(unit, exp.Var): + unit_name = unit.name.upper() + if unit_name in ("ISOWEEK", "WEEKISO"): + week_info = dialect_instance.WEEK_UNIT_SEMANTICS.get(unit_name) + if week_info: + return week_info + return None + + # Handle Week/WeekStart expressions with explicit day + if isinstance(unit, (exp.Week, exp.WeekStart)): + day_var = unit.this + if isinstance(day_var, exp.Var): + day_name = day_var.name.upper() + dow_value = dialect_instance.WEEK_START_DAY_TO_DOW.get(day_name) + return (day_name, dow_value) if dow_value is not None else None + + return None + + def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str: trunc_curr_date = exp.func("date_trunc", "month", expression.this) plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month") diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index 4698df93e7..bd18529450 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -22,6 +22,7 @@ datestrtodate_sql, no_datetime_sql, encode_decode_sql, + extract_week_unit_info, build_formatted_time, no_comment_column_constraint_sql, no_time_sql, @@ -250,9 +251,77 @@ def _implicit_datetime_cast( return arg +def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression: + """ + Build DATE_TRUNC expression for week boundaries with custom start day. + + Args: + date_expr: The date expression to truncate + start_dow: ISO 8601 day-of-week number (Monday=1, ..., Sunday=7) + + DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard). + To align to a different start day, we shift the date before truncating. + + Shift formula: Sunday (7) gets +1, others get (1 - start_dow) + Examples: + Monday (1): shift = 0 (no shift needed) + Tuesday (2): shift = -1 (shift back 1 day) ... + Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week) + """ + # Calculate shift: Sunday is special case, others use formula + shift_days = 1 if start_dow == 7 else 1 - start_dow + + if shift_days == 0: + # No shift needed for Monday-based weeks + return exp.DateTrunc(unit=exp.var("WEEK"), this=date_expr) + + # Shift date to align week boundaries with the desired start day + shifted_date = exp.DateAdd( + this=date_expr, + expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")), + ) + + return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date) + + def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: + """ + Generate DATE_DIFF SQL for DuckDB using DATE_TRUNC-based week alignment. + + DuckDB's DATE_DIFF doesn't support WEEK(day) units. We transpile these to + DATE_TRUNC-based calculations when the day is a compile-time constant. + + Non-literal week units (e.g., columns, placeholders) cannot be transpiled safely + and are passed through as-is, following the pattern used elsewhere in sqlglot + + """ this = _implicit_datetime_cast(expression.this) expr = _implicit_datetime_cast(expression.expression) + unit = expression.args.get("unit") + + # Extract week start day; returns None if day is dynamic (column/placeholder) + week_start = extract_week_unit_info(unit) + if week_start and this and expr: + _, start_dow = week_start + + # For Monday-based weeks (ISO standard), use DuckDB's native WEEK unit + if start_dow == 1: + return self.sql( + exp.DateDiff(this=this, expression=expr, unit=exp.Literal.string("WEEK")) + ) + + # For other week start days, use DATE_TRUNC-based calculation + # Build truncated week boundary expressions + truncated_this = _build_week_trunc_expression(this, start_dow) + truncated_expr = _build_week_trunc_expression(expr, start_dow) + + # Calculate week difference + day_diff = exp.DateDiff( + this=truncated_this, expression=truncated_expr, unit=exp.Literal.string("DAY") + ) + result = exp.IntDiv(this=day_diff, expression=exp.Literal.number(7)) + + return self.sql(result) return self.func("DATE_DIFF", unit_to_str(expression), expr, this) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 151dd46b57..48d683f797 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -3240,6 +3240,107 @@ def test_week(self): "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))", ) + # BigQuery → DuckDB transpilation tests for DATE_DIFF with week units + # Test WEEK(MONDAY) - Monday-based week (optimized to use native DuckDB WEEK) + self.validate_all( + "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(MONDAY))", + write={ + "bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(MONDAY))", + "duckdb": "DATE_DIFF('WEEK', CAST('2024-01-08' AS DATE), CAST('2024-01-15' AS DATE))", + }, + ) + # Test WEEK(SUNDAY) - Sunday-based week (normalized to WEEK) + self.validate_all( + "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SUNDAY))", + write={ + "bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK)", + "duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY)) // 7", + }, + ) + # Test WEEK(SATURDAY) - Saturday-based week calculation + self.validate_all( + "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SATURDAY))", + write={ + "bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SATURDAY))", + "duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY)) // 7", + }, + ) + # Test WEEK - Default Sunday-based week calculation + self.validate_all( + "DATE_DIFF('2024-01-15', '2024-01-08', WEEK)", + write={ + "bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK)", + "duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY)) // 7", + }, + ) + # Test WEEK - Saturday to Sunday boundary (critical test for Sunday-start weeks) + # In BigQuery: Saturday -> Sunday crosses week boundary = 1 week + # Without fix: DuckDB treats as Monday-start weeks = 0 weeks (both in same week) + self.validate_all( + "DATE_DIFF('2024-01-07', '2024-01-06', WEEK)", + write={ + "bigquery": "DATE_DIFF('2024-01-07', '2024-01-06', WEEK)", + "duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-06' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-07' AS DATE) + INTERVAL '1' DAY)) // 7", + }, + ) + # Test ISOWEEK - ISO 8601 Monday-based week (optimized to use native DuckDB WEEK) + self.validate_all( + "DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)", + write={ + "bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)", + "duckdb": "DATE_DIFF('WEEK', CAST('2024-01-08' AS DATE), CAST('2024-01-15' AS DATE))", + }, + ) + # Test with DATE literal - Explicit DATE casting (optimized for Monday-based week) + self.validate_all( + "DATE_DIFF(DATE '2024-01-15', DATE '2024-01-08', WEEK(MONDAY))", + write={ + "bigquery": "DATE_DIFF(CAST('2024-01-15' AS DATE), CAST('2024-01-08' AS DATE), WEEK(MONDAY))", + "duckdb": "DATE_DIFF('WEEK', CAST('2024-01-08' AS DATE), CAST('2024-01-15' AS DATE))", + }, + ) + + # Test Negative difference with WEEK(SUNDAY) - verifies argument order swapping + self.validate_all( + "DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', WEEK(SUNDAY))", + write={ + "bigquery": "DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), WEEK)", + "duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE) + INTERVAL '1' DAY)) // 7", + }, + ) + # Test Negative difference with ISOWEEK - verifies argument order swapping (optimized) + self.validate_all( + "DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', ISOWEEK)", + write={ + "bigquery": "DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), ISOWEEK)", + "duckdb": "DATE_DIFF('WEEK', CAST('2024-01-15' AS DATE), CAST('2024-01-01' AS DATE))", + }, + ) + # Test Year boundary crossing (negative difference) + self.validate_all( + "DATE_DIFF(DATE '2023-12-25', DATE '2024-01-08', WEEK(SUNDAY))", + write={ + "bigquery": "DATE_DIFF(CAST('2023-12-25' AS DATE), CAST('2024-01-08' AS DATE), WEEK)", + "duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2023-12-25' AS DATE) + INTERVAL '1' DAY)) // 7", + }, + ) + # Test Year boundary crossing (positive difference) + self.validate_all( + "DATE_DIFF(DATE '2024-01-08', DATE '2023-12-25', WEEK(SUNDAY))", + write={ + "bigquery": "DATE_DIFF(CAST('2024-01-08' AS DATE), CAST('2023-12-25' AS DATE), WEEK)", + "duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2023-12-25' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY)) // 7", + }, + ) + # Test DAY unit with negative difference - verifies non-week argument swapping + self.validate_all( + "DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', DAY)", + write={ + "bigquery": "DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), DAY)", + "duckdb": "DATE_DIFF('DAY', CAST('2024-01-15' AS DATE), CAST('2024-01-01' AS DATE))", + }, + ) + def test_approx_qunatiles(self): self.validate_identity("APPROX_QUANTILES(foo, 2)") self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 RESPECT NULLS)")