TobikoData
diff --git a/‎.circleci/install-prerequisites.sh‎
Lines changed: 5 additions & 0 deletions b/‎.circleci/install-prerequisites.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.circleci/manage-test-db.sh‎
Lines changed: 3 additions & 1 deletion b/‎.circleci/manage-test-db.sh‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 17 additions & 17 deletions b/‎Makefile‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎docs/integrations/dbt.md‎
Lines changed: 8 additions & 16 deletions b/‎docs/integrations/dbt.md‎
Lines changed: 8 additions & 16 deletions
diff --git a/‎docs/integrations/engines/trino.md‎
Lines changed: 34 additions & 13 deletions b/‎docs/integrations/engines/trino.md‎
Lines changed: 34 additions & 13 deletions
diff --git a/‎examples/sushi_dbt/models/customer_revenue_by_day.sql‎
Lines changed: 1 addition & 1 deletion b/‎examples/sushi_dbt/models/customer_revenue_by_day.sql‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sushi_dbt/models/waiter_as_customer_by_day.sql‎
Lines changed: 1 addition & 1 deletion b/‎examples/sushi_dbt/models/waiter_as_customer_by_day.sql‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sushi_dbt/models/waiter_revenue_by_day.sql‎
Lines changed: 1 addition & 1 deletion b/‎examples/sushi_dbt/models/waiter_revenue_by_day.sql‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sushi_dbt/models/waiter_revenue_by_day_v1.sql‎
Lines changed: 1 addition & 1 deletion b/‎examples/sushi_dbt/models/waiter_revenue_by_day_v1.sql‎
Lines changed: 1 addition & 1 deletion
@@ -34,4 +34,9 @@ echo "Installing OS-level dependencies: $ALL_DEPENDENCIES"
 
 sudo apt-get clean && sudo apt-get -y update && sudo ACCEPT_EULA='Y' apt-get -y install $ALL_DEPENDENCIES
 
+if [ "$ENGINE" == "spark" ]; then
+    echo "Using Java version for spark:"
+    java -version
+fi
+
 echo "All done"
@@ -51,7 +51,9 @@ databricks_init() {
 
     # Note: the cluster doesnt need to be running to create / drop catalogs, but it does need to be running to run the integration tests
     echo "Ensuring cluster is running"
-    databricks clusters start $CLUSTER_ID
+    # the || true is to prevent the following error from causing an abort:
+    # > Error: is in unexpected state Running.
+    databricks clusters start $CLUSTER_ID || true
 }
 
 databricks_up() {
 
@@ -138,7 +138,7 @@ dbt-test:
 	pytest -n auto -m "dbt and not cicdonly"
 
 dbt-fast-test:
-	pytest -n auto -m "dbt and fast" --retries 3
+	pytest -n auto -m "dbt and fast" --reruns 3
 
 github-test:
 	pytest -n auto -m "github"
@@ -173,58 +173,58 @@ engine-%-down:
 ##################
 
 clickhouse-test: engine-clickhouse-up
-	pytest -n auto -m "clickhouse" --retries 3 --junitxml=test-results/junit-clickhouse.xml
+	pytest -n auto -m "clickhouse" --reruns 3 --junitxml=test-results/junit-clickhouse.xml
 
 duckdb-test: engine-duckdb-install
-	pytest -n auto -m "duckdb" --retries 3 --junitxml=test-results/junit-duckdb.xml
+	pytest -n auto -m "duckdb" --reruns 3 --junitxml=test-results/junit-duckdb.xml
 
 mssql-test: engine-mssql-up
-	pytest -n auto -m "mssql" --retries 3 --junitxml=test-results/junit-mssql.xml
+	pytest -n auto -m "mssql" --reruns 3 --junitxml=test-results/junit-mssql.xml
 
 mysql-test: engine-mysql-up
-	pytest -n auto -m "mysql" --retries 3 --junitxml=test-results/junit-mysql.xml
+	pytest -n auto -m "mysql" --reruns 3 --junitxml=test-results/junit-mysql.xml
 
 postgres-test: engine-postgres-up
-	pytest -n auto -m "postgres" --retries 3 --junitxml=test-results/junit-postgres.xml
+	pytest -n auto -m "postgres" --reruns 3 --junitxml=test-results/junit-postgres.xml
 
 spark-test: engine-spark-up
-	pytest -n auto -m "spark" --retries 3 --junitxml=test-results/junit-spark.xml
+	pytest -n auto -m "spark" --reruns 3 --junitxml=test-results/junit-spark.xml && pytest -n auto -m "pyspark" --reruns 3 --junitxml=test-results/junit-pyspark.xml
 
 trino-test: engine-trino-up
-	pytest -n auto -m "trino" --retries 3 --junitxml=test-results/junit-trino.xml
+	pytest -n auto -m "trino" --reruns 3 --junitxml=test-results/junit-trino.xml
 
 risingwave-test: engine-risingwave-up
-	pytest -n auto -m "risingwave" --retries 3 --junitxml=test-results/junit-risingwave.xml
+	pytest -n auto -m "risingwave" --reruns 3 --junitxml=test-results/junit-risingwave.xml
 
 #################
 # Cloud Engines #
 #################
 
 snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER guard-SNOWFLAKE_PASSWORD engine-snowflake-install
-	pytest -n auto -m "snowflake" --retries 3 --junitxml=test-results/junit-snowflake.xml
+	pytest -n auto -m "snowflake" --reruns 3 --junitxml=test-results/junit-snowflake.xml
 
 bigquery-test: guard-BIGQUERY_KEYFILE engine-bigquery-install
 	$(PIP) install -e ".[bigframes]"
-	pytest -n auto -m "bigquery" --retries 3 --junitxml=test-results/junit-bigquery.xml
+	pytest -n auto -m "bigquery" --reruns 3 --junitxml=test-results/junit-bigquery.xml
 
 databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_ACCESS_TOKEN guard-DATABRICKS_CONNECT_VERSION engine-databricks-install
 	$(PIP) install 'databricks-connect==${DATABRICKS_CONNECT_VERSION}'
-	pytest -n auto -m "databricks" --retries 3 --junitxml=test-results/junit-databricks.xml
+	pytest -n auto -m "databricks" --reruns 3 --junitxml=test-results/junit-databricks.xml
 
 redshift-test: guard-REDSHIFT_HOST guard-REDSHIFT_USER guard-REDSHIFT_PASSWORD guard-REDSHIFT_DATABASE engine-redshift-install
-	pytest -n auto -m "redshift" --retries 3 --junitxml=test-results/junit-redshift.xml
+	pytest -n auto -m "redshift" --reruns 3 --junitxml=test-results/junit-redshift.xml
 
 clickhouse-cloud-test: guard-CLICKHOUSE_CLOUD_HOST guard-CLICKHOUSE_CLOUD_USERNAME guard-CLICKHOUSE_CLOUD_PASSWORD engine-clickhouse-install
-	pytest -n 1 -m "clickhouse_cloud" --retries 3 --junitxml=test-results/junit-clickhouse-cloud.xml
+	pytest -n 1 -m "clickhouse_cloud" --reruns 3 --junitxml=test-results/junit-clickhouse-cloud.xml
 
 athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3_WAREHOUSE_LOCATION engine-athena-install
-	pytest -n auto -m "athena" --retries 3 --junitxml=test-results/junit-athena.xml
+	pytest -n auto -m "athena" --reruns 3 --junitxml=test-results/junit-athena.xml
 
 fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install
-	pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml
+	pytest -n auto -m "fabric" --reruns 3 --junitxml=test-results/junit-fabric.xml
 
 gcp-postgres-test: guard-GCP_POSTGRES_INSTANCE_CONNECTION_STRING guard-GCP_POSTGRES_USER guard-GCP_POSTGRES_PASSWORD guard-GCP_POSTGRES_KEYFILE_JSON engine-gcppostgres-install
-	pytest -n auto -m "gcp_postgres" --retries 3 --junitxml=test-results/junit-gcp-postgres.xml
+	pytest -n auto -m "gcp_postgres" --reruns 3 --junitxml=test-results/junit-gcp-postgres.xml
 
 vscode_settings:
 	mkdir -p .vscode
 
@@ -219,7 +219,7 @@ This section describes how to adapt dbt's incremental models to run on sqlmesh a
 SQLMesh supports two approaches to implement [idempotent](../concepts/glossary.md#idempotency) incremental loads:
 
 * Using merge (with the sqlmesh [`INCREMENTAL_BY_UNIQUE_KEY` model kind](../concepts/models/model_kinds.md#incremental_by_unique_key))
-* Using insert-overwrite/delete+insert (with the sqlmesh [`INCREMENTAL_BY_TIME_RANGE` model kind](../concepts/models/model_kinds.md#incremental_by_time_range))
+* Using [`INCREMENTAL_BY_TIME_RANGE` model kind](../concepts/models/model_kinds.md#incremental_by_time_range)
 
 #### Incremental by unique key
 
@@ -233,28 +233,22 @@ To enable incremental_by_unique_key incrementality, the model configuration shou
 
 #### Incremental by time range
 
-To enable incremental_by_time_range incrementality, the model configuration should contain:
+To enable incremental_by_time_range incrementality, the model configuration must contain:
 
-* The `time_column` key with the model's time column field name as the value (see [`time column`](../concepts/models/model_kinds.md#time-column) for details)
 * The `materialized` key with value `'incremental'`
-* Either:
-    * The `incremental_strategy` key with value `'insert_overwrite'` or
-    * The `incremental_strategy` key with value `'delete+insert'`
-    * Note: in this context, these two strategies are synonyms. Regardless of which one is specified SQLMesh will use the [`best incremental strategy`](../concepts/models/model_kinds.md#materialization-strategy) for the target engine.
+* The `incremental_strategy` key with the value `incremental_by_time_range`
+* The `time_column` key with the model's time column field name as the value (see [`time column`](../concepts/models/model_kinds.md#time-column) for details)
 
 ### Incremental logic
 
-SQLMesh requires a new jinja block gated by `{% if sqlmesh_incremental is defined %}`. The new block should supersede the existing `{% if is_incremental() %}` block and contain the `WHERE` clause selecting the time interval.
+Unlike dbt incremental strategies, SQLMesh does not require the use of `is_incremental` jinja blocks to implement incremental logic. 
+Instead, SQLMesh provides predefined time macro variables that can be used in the model's SQL to filter data based on the time column.
 
 For example, the SQL `WHERE` clause with the "ds" column goes in a new jinja block gated by `{% if sqlmesh_incremental is defined %}` as follows:
 
 ```bash
-> {% if sqlmesh_incremental is defined %}
 >   WHERE
 >     ds BETWEEN '{{ start_ds }}' AND '{{ end_ds }}'
-> {% elif is_incremental() %}
->   ; < your existing is_incremental block >
-> {% endif %}
 ```
 
 `{{ start_ds }}` and `{{ end_ds }}` are the jinja equivalents of SQLMesh's `@start_ds` and `@end_ds` predefined time macro variables. See all [predefined time variables](../concepts/macros/macro_variables.md) available in jinja.
@@ -263,13 +257,11 @@ For example, the SQL `WHERE` clause with the "ds" column goes in a new jinja blo
 
 SQLMesh provides configuration parameters that enable control over how incremental computations occur. These parameters are set in the model's `config` block.
 
-The [`batch_size` parameter](../concepts/models/overview.md#batch_size) determines the maximum number of time intervals to run in a single job.
-
-The [`lookback` parameter](../concepts/models/overview.md#lookback) is used to capture late arriving data. It sets the number of units of late arriving data the model should expect and must be a positive integer.
+See [Incremental Model Properties](../concepts/models/overview.md#incremental-model-properties) for the full list of incremental model configuration parameters.
 
 **Note:** By default, all incremental dbt models are configured to be [forward-only](../concepts/plans.md#forward-only-plans). However, you can change this behavior by setting the `forward_only: false` setting either in the configuration of an individual model or globally for all models in the `dbt_project.yaml` file. The [forward-only](../concepts/plans.md#forward-only-plans) mode aligns more closely with the typical operation of dbt and therefore better meets user's expectations.
 
-Similarly, the [allow_partials](../concepts/models/overview.md#allow_partials) parameter is set to `true` by default for incremental dbt models unless the time column is specified, or the `allow_partials` parameter is explicitly set to `false` in the model configuration.
+Similarly, the [allow_partials](../concepts/models/overview.md#allow_partials) parameter is set to `true` by default unless the `allow_partials` parameter is explicitly set to `false` in the model configuration.
 
 #### on_schema_change
 
 
@@ -81,19 +81,21 @@ hive.metastore.glue.default-warehouse-dir=s3://my-bucket/
 
 ### Connection options
 
-| Option               | Description                                                                                                                                                               |  Type  | Required |
-|----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:|
-| `type`               | Engine type name - must be `trino`                                                                                                                                        | string |    Y     |
-| `user`               | The username (of the account) to log in to your cluster. When connecting to Starburst Galaxy clusters, you must include the role of the user as a suffix to the username. | string |    Y     |
-| `host`               | The hostname of your cluster. Don't include the `http://` or `https://` prefix.                                                                                           | string |    Y     |
-| `catalog`            | The name of a catalog in your cluster.                                                                                                                                    | string |    Y     |
-| `http_scheme`        | The HTTP scheme to use when connecting to your cluster. By default, it's `https` and can only be `http` for no-auth or basic auth.                                        | string |    N     |
-| `port`               | The port to connect to your cluster. By default, it's `443` for `https` scheme and `80` for `http`                                                                        |  int   |    N     |
-| `roles`              | Mapping of catalog name to a role                                                                                                                                         |  dict  |    N     |
-| `http_headers`       | Additional HTTP headers to send with each request.                                                                                                                        |  dict  |    N     |
-| `session_properties` | Trino session properties. Run `SHOW SESSION` to see all options.                                                                                                          |  dict  |    N     |
-| `retries`            | Number of retries to attempt when a request fails. Default: `3`                                                                                                           |  int   |    N     |
-| `timezone`           | Timezone to use for the connection. Default: client-side local timezone                                                                                                   | string |    N     |
+| Option                    | Description                                                                                                                                                                             |  Type  | Required |
+|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:|
+| `type`                    | Engine type name - must be `trino`                                                                                                                                                      | string |    Y     |
+| `user`                    | The username (of the account) to log in to your cluster. When connecting to Starburst Galaxy clusters, you must include the role of the user as a suffix to the username.               | string |    Y     |
+| `host`                    | The hostname of your cluster. Don't include the `http://` or `https://` prefix.                                                                                                         | string |    Y     |
+| `catalog`                 | The name of a catalog in your cluster.                                                                                                                                                  | string |    Y     |
+| `http_scheme`             | The HTTP scheme to use when connecting to your cluster. By default, it's `https` and can only be `http` for no-auth or basic auth.                                                      | string |    N     |
+| `port`                    | The port to connect to your cluster. By default, it's `443` for `https` scheme and `80` for `http`                                                                                      |  int   |    N     |
+| `roles`                   | Mapping of catalog name to a role                                                                                                                                                       |  dict  |    N     |
+| `http_headers`            | Additional HTTP headers to send with each request.                                                                                                                                      |  dict  |    N     |
+| `session_properties`      | Trino session properties. Run `SHOW SESSION` to see all options.                                                                                                                        |  dict  |    N     |
+| `retries`                 | Number of retries to attempt when a request fails. Default: `3`                                                                                                                         |  int   |    N     |
+| `timezone`                | Timezone to use for the connection. Default: client-side local timezone                                                                                                                 | string |    N     |
+| `schema_location_mapping` | A mapping of regex patterns to S3 locations to use for the `LOCATION` property when creating schemas. See [Table and Schema locations](#table-and-schema-locations) for more details.   |  dict  |    N     |
+| `catalog_type_overrides`  | A mapping of catalog names to their connector type. This is used to enable/disable connector specific behavior. See [Catalog Type Overrides](#catalog-type-overrides) for more details. |  dict  |    N     |
 
 ## Table and Schema locations
 
@@ -204,6 +206,25 @@ SELECT ...
 
 This will cause SQLMesh to set the specified `LOCATION` when issuing a `CREATE TABLE` statement.
 
+## Catalog Type Overrides
+
+SQLMesh attempts to determine the connector type of a catalog by querying the `system.metadata.catalogs` table and checking the `connector_name` column.
+It checks if the connector name is `hive` for Hive connector behavior or contains `iceberg` or `delta_lake` for Iceberg or Delta Lake connector behavior respectively.
+However, the connector name may not always be a reliable way to determine the connector type, for example when using a custom connector or a fork of an existing connector.
+To handle such cases, you can use the `catalog_type_overrides` connection property to explicitly specify the connector type for specific catalogs.
+For example, to specify that the `datalake` catalog is using the Iceberg connector and the `analytics` catalog is using the Hive connector, you can configure the connection as follows:
+
+```yaml title="config.yaml"
+gateways:
+  trino:
+    connection:
+      type: trino
+      ...
+      catalog_type_overrides:
+        datalake: iceberg
+        analytics: hive
+```
+
 ## Authentication
 
 === "No Auth"
 
@@ -1,7 +1,7 @@
 {{
   config(
     materialized='incremental',
-    incremental_strategy='delete+insert',
+    incremental_strategy='incremental_by_time_range',
     cluster_by=['ds'],
     time_column='ds',
   )
 
@@ -1,7 +1,7 @@
 {{
   config(
     materialized='incremental',
-    incremental_strategy='delete+insert',
+    incremental_strategy='incremental_by_time_range',
     cluster_by=['ds'],
     time_column='ds',
   )
 
@@ -1,7 +1,7 @@
 {{
   config(
     materialized='incremental',
-    incremental_strategy='delete+insert',
+    incremental_strategy='incremental_by_time_range',
     cluster_by=['ds'],
     time_column='ds',
   )
 
@@ -1,7 +1,7 @@
 {{
   config(
     materialized='incremental',
-    incremental_strategy='delete+insert',
+    incremental_strategy='incremental_by_time_range',
     cluster_by=['ds'],
     time_column='ds',
   )
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,9 @@ databricks_init() {`
`51`	`51`
`52`	`52`	`# Note: the cluster doesnt need to be running to create / drop catalogs, but it does need to be running to run the integration tests`
`53`	`53`	`echo "Ensuring cluster is running"`
`54`		`- databricks clusters start $CLUSTER_ID`
	`54`	`+ # the \|\| true is to prevent the following error from causing an abort:`
	`55`	`+ # > Error: is in unexpected state Running.`
	`56`	`+ databricks clusters start $CLUSTER_ID \|\| true`
`55`	`57`	`}`
`56`	`58`
`57`	`59`	`databricks_up() {`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{{`
`2`	`2`	`config(`
`3`	`3`	`materialized='incremental',`
`4`		`- incremental_strategy='delete+insert',`
	`4`	`+ incremental_strategy='incremental_by_time_range',`
`5`	`5`	`cluster_by=['ds'],`
`6`	`6`	`time_column='ds',`
`7`	`7`	`)`