Skip to content
74 changes: 73 additions & 1 deletion sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
date_add_interval_sql,
datestrtodate_sql,
build_formatted_time,
extract_week_unit_info,
filter_array_using_unnest,
if_sql,
inline_array_unless_query,
Expand Down Expand Up @@ -209,6 +210,39 @@ def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) ->
return self.func("DATE_DIFF", expression.this, expression.expression, unit)


def _generate_bq_datetime_diff_unit(self: BigQuery.Generator, expression: exp.Expression) -> str:
"""
Generate unit for *_DIFF functions, converting Week expressions to BigQuery syntax.

Canonical form -> BigQuery syntax:
- Week(SUNDAY) -> WEEK (BigQuery's default)
- Week(MONDAY) -> WEEK(MONDAY) (preserved during round-trip)
- Var(ISOWEEK) -> ISOWEEK (preserved as-is)

"""
unit = expression.args.get("unit")

# Preserve ISOWEEK/WEEKISO as-is.
if isinstance(unit, exp.Var):
if unit.name.upper() in ("ISOWEEK", "WEEKISO"):
return self.sql(unit)

week_info = extract_week_unit_info(unit)

if week_info:
day_name, _ = week_info
if day_name == "SUNDAY":
return "WEEK"
elif isinstance(unit, exp.WeekStart):
# Preserve WeekStart expressions as-is
return self.sql(unit)
else:
# Convert to WEEK(day) for all other cases
return self.sql(exp.Week(this=exp.var(day_name)))

return self.sql(unit_to_var(expression))


def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str:
scale = expression.args.get("scale")
timestamp = expression.this
Expand Down Expand Up @@ -242,6 +276,43 @@ def _build_datetime(args: t.List) -> exp.Func:
return exp.TimestampFromParts.from_arg_list(args)


def _normalize_week_unit(unit: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
"""
In BigQuery, plain WEEK defaults to Sunday-start weeks.
Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST for correct cross-dialect transpilation.
"""
if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK":
return exp.Week(this=exp.var("SUNDAY"))

if isinstance(unit, exp.Column) and not unit.table and isinstance(unit.this, exp.Identifier):
if unit.name.upper() == "WEEK":
return exp.Week(this=exp.var("SUNDAY"))

return unit


def build_date_time_diff_with_week_normalization(
exp_class: t.Type[E],
) -> t.Callable[[t.List], E]:
"""
Factory for *_DIFF functions that normalizes plain WEEK units to WEEK(SUNDAY).

These functions have signature: FUNC(expr1, expr2, date_part)
where date_part is at argument index 2.

Supports: DATE_DIFF, DATETIME_DIFF, TIME_DIFF, TIMESTAMP_DIFF
"""

def _builder(args: t.List) -> E:
return exp_class(
this=seq_get(args, 0),
expression=seq_get(args, 1),
unit=_normalize_week_unit(seq_get(args, 2)),
)

return _builder


def _build_regexp_extract(
expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
) -> t.Callable[[t.List], E]:
Expand Down Expand Up @@ -542,6 +613,7 @@ class Parser(parser.Parser):
"CONTAINS_SUBSTR": _build_contains_substring,
"DATE": _build_date,
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
"DATE_DIFF": build_date_time_diff_with_week_normalization(exp.DateDiff),
"DATE_SUB": build_date_delta_with_interval(exp.DateSub),
"DATE_TRUNC": lambda args: exp.DateTrunc(
unit=seq_get(args, 1),
Expand Down Expand Up @@ -1083,7 +1155,7 @@ class Generator(generator.Generator):
exp.CTE: transforms.preprocess([_pushdown_cte_column_names]),
exp.DateAdd: date_add_interval_sql("DATE", "ADD"),
exp.DateDiff: lambda self, e: self.func(
"DATE_DIFF", e.this, e.expression, unit_to_var(e)
"DATE_DIFF", e.this, e.expression, _generate_bq_datetime_diff_unit(self, e)
),
exp.DateFromParts: rename_func("DATE"),
exp.DateStrToDate: datestrtodate_sql,
Expand Down
64 changes: 64 additions & 0 deletions sqlglot/dialects/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,26 @@ class Dialect(metaclass=_Dialect):
"CENTURIES": "CENTURY",
}

# Mapping of week unit names to (start_day, iso_dow) tuples
# iso_dow follows ISO 8601 day-of-week numbering (Monday=1, ..., Sunday=7)
WEEK_UNIT_SEMANTICS = {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do these numbers 0 and 1 mean?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are mapping of week unit names to (start_day, dow_number) tuples, where dow_number is the numeric day-of-week value (0=Sunday, 1=Monday, etc.)

"WEEK": ("SUNDAY", 7),
"ISOWEEK": ("MONDAY", 1),
"WEEKISO": ("MONDAY", 1),
}

# Days of week to ISO 8601 day-of-week numbers
# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
WEEK_START_DAY_TO_DOW = {
"MONDAY": 1,
"TUESDAY": 2,
"WEDNESDAY": 3,
"THURSDAY": 4,
"FRIDAY": 5,
"SATURDAY": 6,
"SUNDAY": 7,
}

# Specifies what types a given type can be coerced into
COERCES_TO: t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]] = {}

Expand Down Expand Up @@ -1701,6 +1721,50 @@ def map_date_part(part, dialect: DialectType = Dialect):
return part


def extract_week_unit_info(
unit: t.Optional[exp.Expression], dialect: DialectType = None
) -> t.Optional[t.Tuple[str, int]]:
"""
Extract week unit information from AST node.

This helper provides a unified way to handle week units across dialects.

Args:
unit: The unit expression (Var for ISOWEEK/WEEKISO, Week, or WeekStart)
dialect: Dialect used to access WEEK_UNIT_SEMANTICS and WEEK_START_DAY_TO_DOW mappings.

Returns:
Tuple of (day_name, iso_dow) where iso_dow is ISO 8601 day number (Monday=1, Sunday=7),
or None if not a week unit or if day is dynamic (not a constant).

Examples:
Week(Var('SUNDAY')) → ('SUNDAY', 7)
Var('ISOWEEK') → ('MONDAY', 1)
Column('week') → None (dynamic, not a constant)

"""
dialect_instance = Dialect.get_or_raise(dialect)

# Handle plain Var expressions for ISOWEEK/WEEKISO only
if isinstance(unit, exp.Var):
unit_name = unit.name.upper()
if unit_name in ("ISOWEEK", "WEEKISO"):
week_info = dialect_instance.WEEK_UNIT_SEMANTICS.get(unit_name)
if week_info:
return week_info
return None

# Handle Week/WeekStart expressions with explicit day
if isinstance(unit, (exp.Week, exp.WeekStart)):
day_var = unit.this
if isinstance(day_var, exp.Var):
day_name = day_var.name.upper()
dow_value = dialect_instance.WEEK_START_DAY_TO_DOW.get(day_name)
return (day_name, dow_value) if dow_value is not None else None

return None


def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
trunc_curr_date = exp.func("date_trunc", "month", expression.this)
plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
Expand Down
63 changes: 63 additions & 0 deletions sqlglot/dialects/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,72 @@ def _implicit_datetime_cast(
return arg


def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:
"""
Build DATE_TRUNC expression for week boundaries with custom start day.

Args:
date_expr: The date expression to truncate
start_dow: ISO 8601 day-of-week number (Monday=1, ..., Sunday=7)

DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard).
To align to a different start day, we shift the date before truncating.

Shift formula: Sunday (7) gets +1, others get (1 - start_dow)
Examples:
Monday (1): shift = 0 (no shift needed)
Tuesday (2): shift = -1 (shift back 1 day) ...
Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week)
"""
# Calculate shift: Sunday is special case, others use formula
shift_days = 1 if start_dow == 7 else 1 - start_dow

if shift_days == 0:
# No shift needed for Monday-based weeks
return exp.DateTrunc(unit=exp.var("WEEK"), this=date_expr)

# Shift date to align week boundaries with the desired start day
shifted_date = exp.DateAdd(
this=date_expr,
expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")),
)

return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date)


def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
"""
Generate DATE_DIFF SQL for DuckDB using DATE_TRUNC-based week alignment.

DuckDB's DATE_DIFF doesn't support WEEK(day) units. We transpile these to
DATE_TRUNC-based calculations when the day is a compile-time constant.

Non-literal week units (e.g., columns, placeholders) cannot be transpiled safely
and are passed through as-is, following the pattern used elsewhere in sqlglot

"""
from sqlglot.dialects.dialect import extract_week_unit_info

this = _implicit_datetime_cast(expression.this)
expr = _implicit_datetime_cast(expression.expression)
unit = expression.args.get("unit")

# Extract week start day; returns None if day is dynamic (column/placeholder)
week_start = extract_week_unit_info(unit)
if week_start and this and expr:
_, start_dow = week_start

# Build truncated week boundary expressions
truncated_this = _build_week_trunc_expression(this, start_dow)
truncated_expr = _build_week_trunc_expression(expr, start_dow)

# Calculate week difference
day_diff = exp.DateDiff(
this=truncated_this, expression=truncated_expr, unit=exp.Literal.string("DAY")
)
result = exp.IntDiv(this=day_diff, expression=exp.Literal.number(7))

return self.sql(result)

return self.func("DATE_DIFF", unit_to_str(expression), expr, this)

Expand Down
91 changes: 91 additions & 0 deletions tests/dialects/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -3219,6 +3219,97 @@ def test_week(self):
"EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
)

# BigQuery → DuckDB transpilation tests for DATE_DIFF with week units
# Test WEEK(MONDAY) - Monday-based week
self.validate_all(
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK(MONDAY))",
write={
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(MONDAY))",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE))) // 7",
},
)
# Test WEEK(SUNDAY) - Sunday-based week (normalized to WEEK)
self.validate_all(
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SUNDAY))",
write={
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY)) // 7",
},
)
# Test WEEK(SATURDAY) - Saturday-based week calculation
self.validate_all(
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SATURDAY))",
write={
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SATURDAY))",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY)) // 7",
},
)
# Test WEEK - Default Sunday-based week calculation
self.validate_all(
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
write={
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY)) // 7",
},
)
# Test ISOWEEK - ISO 8601 Monday-based week calculation
self.validate_all(
"DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
write={
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE))) // 7",
},
)
# Test with DATE literal - Explicit DATE casting
self.validate_all(
"DATE_DIFF(DATE '2024-01-15', DATE '2024-01-08', WEEK(MONDAY))",
write={
"bigquery": "DATE_DIFF(CAST('2024-01-15' AS DATE), CAST('2024-01-08' AS DATE), WEEK(MONDAY))",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE))) // 7",
},
)

# Test Negative difference with WEEK(SUNDAY) - verifies argument order swapping
self.validate_all(
"DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', WEEK(SUNDAY))",
write={
"bigquery": "DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), WEEK)",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE) + INTERVAL '1' DAY)) // 7",
},
)
# Test Negative difference with ISOWEEK - verifies argument order swapping
self.validate_all(
"DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', ISOWEEK)",
write={
"bigquery": "DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), ISOWEEK)",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE))) // 7",
},
)
# Test Year boundary crossing (negative difference)
self.validate_all(
"DATE_DIFF(DATE '2023-12-25', DATE '2024-01-08', WEEK(SUNDAY))",
write={
"bigquery": "DATE_DIFF(CAST('2023-12-25' AS DATE), CAST('2024-01-08' AS DATE), WEEK)",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2023-12-25' AS DATE) + INTERVAL '1' DAY)) // 7",
},
)
# Test Year boundary crossing (positive difference)
self.validate_all(
"DATE_DIFF(DATE '2024-01-08', DATE '2023-12-25', WEEK(SUNDAY))",
write={
"bigquery": "DATE_DIFF(CAST('2024-01-08' AS DATE), CAST('2023-12-25' AS DATE), WEEK)",
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2023-12-25' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY)) // 7",
},
)
# Test DAY unit with negative difference - verifies non-week argument swapping
self.validate_all(
"DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', DAY)",
write={
"bigquery": "DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), DAY)",
"duckdb": "DATE_DIFF('DAY', CAST('2024-01-15' AS DATE), CAST('2024-01-01' AS DATE))",
},
)

def test_approx_qunatiles(self):
self.validate_identity("APPROX_QUANTILES(foo, 2)")
self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 RESPECT NULLS)")
Expand Down