Remove support for non-maintained CH versions. Add again support for 25.8 (latest). Optimize some tests. (#517)

koletzilla · web-flow · commit ed4e0b6c05d0 · 2025-09-18T10:21:30.000+02:00
* Remove support for non-maintained CH versions. Add again support for 25.8 (latest)
* Ensure system tables are available right after cluster is created.
* Retry queries instead of waiting a fixed amount of time.
diff --git a/.github/workflows/test_matrix.yml b/.github/workflows/test_matrix.yml
@@ -22,12 +22,11 @@ jobs:
           - '3.10'
           - '3.11'
           - '3.12'
-        clickhouse-version:
-          - '23.8'
-          - '24.3'
-          - '24.8'
+        clickhouse-version:  # Testing ClickHouse versions with active security support https://github.com/ClickHouse/ClickHouse/blob/master/SECURITY.md
           - '25.3'
-          - '25.7'  # Skipping 25.8+ until https://github.com/ClickHouse/ClickHouse/issues/86434 is fixed.
+          - '25.6'
+          - '25.7'
+          - 'latest'
 
     steps:
       - name: Checkout
diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_table_ttl.py b/tests/integration/adapter/clickhouse/test_clickhouse_table_ttl.py
@@ -7,13 +7,7 @@
 from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations
 from dbt.tests.util import relation_from_name, run_dbt
 
-from tests.integration.adapter.helpers import below_version
 
-
-@pytest.mark.skipif(
-    below_version(24, 3),
-    reason='Pending to fix. Syntax error in 23.8: Code: 62. DB::Exception: Syntax error: failed at position 501 (\'SETTINGS\') (line 20, col 21): SETTINGS  allow_nullable_key=1,  replicated_deduplication_window=0',
-)
 class TestTableTTL(BaseSimpleMaterializations):
     @pytest.fixture(scope="class")
     def models(self):
@@ -88,8 +82,8 @@ def test_base(self, project):
 
 
 @pytest.mark.skipif(
-    os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '' or below_version(24, 3),
-    reason='Not on a cluster. Also generated syntax not supported in 23.8: Code: 62. DB::Exception: Syntax error: failed at position 530 (end of query) (line 31, col 3): . Expected end of query. (SYNTAX_ERROR) (version 23.8.16.16 (official build))',
+    os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '',
+    reason='Not on a cluster.',
 )
 class TestDistributedTableTTL:
     @pytest.fixture(scope="class")
diff --git a/tests/integration/adapter/dictionary/test_dictionary.py b/tests/integration/adapter/dictionary/test_dictionary.py
@@ -4,11 +4,12 @@
 
 import json
 import os
-import time
 
 import pytest
 from dbt.tests.util import run_dbt
 
+from tests.integration.adapter.helpers import DEFAULT_RETRY_CONFIG, retry_until_assertion_passes
+
 testing_s3 = os.environ.get('DBT_CH_TEST_INCLUDE_S3', '').lower() in ('1', 'true', 'yes')
 
 
@@ -177,10 +178,17 @@ def test_create_and_update(self, project):
         # force the dictionary to be rebuilt to include the new records in `people`
         project.run_sql("system reload dictionary hackers")
 
-        if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes'):
-            time.sleep(30)
-        result = project.run_sql("select count(distinct id) from hackers", fetch="all")
-        assert result[0][0] == 5
+        retry_config = (
+            {'max_retries': 30, 'delay': 1}
+            if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes')
+            else DEFAULT_RETRY_CONFIG
+        )
+
+        def check_count():
+            result = project.run_sql("select count(distinct id) from hackers", fetch="all")
+            assert result[0][0] == 5
+
+        retry_until_assertion_passes(check_count, **retry_config)
 
         # re-run dbt but this time with the new MV SQL
         run_vars = {"run_type": "extended_schema"}
diff --git a/tests/integration/adapter/helpers.py b/tests/integration/adapter/helpers.py
@@ -1,5 +1,6 @@
 import os
-from typing import Optional
+import time
+from typing import Any, Callable, Optional, TypedDict
 
 
 def below_version(major: int, minor: int = 0, _ch_test_version_value: Optional[str] = None) -> bool:
@@ -20,3 +21,30 @@ def below_version(major: int, minor: int = 0, _ch_test_version_value: Optional[s
     )
     actual_major, actual_minor = current_version.split('.')
     return int(actual_major) < major or (int(actual_major) == major and int(actual_minor) < minor)
+
+
+retry_config = TypedDict('retry_config', {'max_retries': int, 'delay': float})
+DEFAULT_RETRY_CONFIG: retry_config = {
+    "max_retries": 20,
+    "delay": 0.5,
+}
+
+
+def retry_until_assertion_passes(
+    func: Callable[[], Any],
+    max_retries: int = DEFAULT_RETRY_CONFIG["max_retries"],
+    delay: float = DEFAULT_RETRY_CONFIG["delay"],
+) -> Any:
+    last_error: Optional[AssertionError] = None
+    for attempt in range(max_retries + 1):  # +1 to include the initial attempt
+        try:
+            return func()
+        except AssertionError as e:
+            last_error = e
+            if attempt < max_retries:  # Don't sleep after the last attempt
+                time.sleep(delay)
+            continue
+    # If we get here, all retries failed
+    if last_error:
+        raise last_error
+    return None
diff --git a/tests/integration/adapter/materialized_view/test_materialized_view.py b/tests/integration/adapter/materialized_view/test_materialized_view.py
@@ -9,8 +9,6 @@
 from dbt.adapters.clickhouse.query import quote_identifier
 from dbt.tests.util import check_relation_types, run_dbt
 
-from tests.integration.adapter.helpers import below_version
-
 PEOPLE_SEED_CSV = """
 id,name,age,department
 1231,Dade,33,engineering
@@ -175,10 +173,6 @@ def test_disabled_catchup(self, project):
         assert result[0][0] == 1
 
 
-@pytest.mark.skipif(
-    below_version(24, 3),
-    reason='Pending to fix. Syntax error in 23.8: Code: 53. DB::Exception: Cannot convert string VIEW to type UInt8: while executing...',
-)
 class TestUpdateMV:
     @pytest.fixture(scope="class")
     def seeds(self):
diff --git a/tests/integration/adapter/materialized_view/test_multiple_materialized_views.py b/tests/integration/adapter/materialized_view/test_multiple_materialized_views.py
@@ -9,8 +9,6 @@
 from dbt.adapters.clickhouse.query import quote_identifier
 from dbt.tests.util import check_relation_types, run_dbt
 
-from tests.integration.adapter.helpers import below_version
-
 PEOPLE_SEED_CSV = """
 id,name,age,department
 1231,Dade,33,engineering
@@ -178,10 +176,6 @@ def test_create(self, project):
         ]
 
 
-@pytest.mark.skipif(
-    below_version(24, 3),
-    reason='Pending to fix. Syntax error in 23.8: Code: 53. DB::Exception: Cannot convert string VIEW to type UInt8: while executing...',
-)
 class TestUpdateMultipleMV:
     @pytest.fixture(scope="class")
     def seeds(self):
diff --git a/tests/integration/adapter/materialized_view/test_refreshable_materialized_view.py b/tests/integration/adapter/materialized_view/test_refreshable_materialized_view.py
@@ -9,8 +9,6 @@
 import pytest
 from dbt.tests.util import check_relation_types, run_dbt
 
-from tests.integration.adapter.helpers import below_version
-
 PEOPLE_SEED_CSV = """
 id,name,age,department
 1231,Dade,33,engineering
@@ -58,10 +56,6 @@
 """
 
 
-@pytest.mark.skipif(
-    below_version(25),
-    reason='Refreshable MVs are not supported in the tested 24 versions https://github.com/ClickHouse/ClickHouse/issues/59369#issuecomment-2047926705',
-)
 class TestBasicRefreshableMV:
     @pytest.fixture(scope="class")
     def seeds(self):
diff --git a/tests/integration/adapter/projections/test_projections.py b/tests/integration/adapter/projections/test_projections.py
@@ -1,10 +1,11 @@
 import os
-import time
 import uuid
 
 import pytest
 from dbt.tests.util import relation_from_name, run_dbt
 
+from tests.integration.adapter.helpers import DEFAULT_RETRY_CONFIG, retry_until_assertion_passes
+
 PEOPLE_SEED_CSV = """
 id,name,age,department
 1231,Dade,33,engineering
@@ -66,7 +67,11 @@
       - name: people
 """
 
-SLEEP_TIME = 30 if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes') else 10
+RETRY_CONFIG = (
+    {'max_retries': 30, 'delay': 1}
+    if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes')
+    else DEFAULT_RETRY_CONFIG
+)
 
 
 class TestProjections:
@@ -87,6 +92,18 @@ def models(self):
             % "table",
         }
 
+    def _get_table_reference(self, table: str) -> str:
+        return (
+            table
+            if os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == ''
+            else f"clusterAllReplicas({os.environ.get('DBT_CH_TEST_CLUSTER')}, {table})"
+        )
+
+    def _flush_system_logs(self, project) -> None:
+        cluster = os.environ.get('DBT_CH_TEST_CLUSTER', '').strip()
+        cluster_clause = f'ON CLUSTER "{cluster}"' if cluster else ''
+        project.run_sql(f"SYSTEM FLUSH LOGS {cluster_clause}", fetch="all")
+
     def test_create_and_verify_projection(self, project):
         run_dbt(["seed"])
         run_dbt(["run", "--select", "people_with_projection"])
@@ -102,20 +119,23 @@ def test_create_and_verify_projection(self, project):
         assert len(result) == 3  # We expect 3 departments in the result
         assert result == [('engineering', 43.666666666666664), ('malware', 40.0), ('sales', 25.0)]
 
-        # waiting for system.log table to be created/populated
-        time.sleep(SLEEP_TIME)
-
         # check that the latest query used the projection
-        result = project.run_sql(
-            f"SELECT query, projections FROM clusterAllReplicas(default, 'system.query_log') "
-            f"WHERE query like '%{unique_query_identifier}%' "
-            f"and query not like '%clusterAllReplicas%' and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
-            fetch="all",
+        def check_that_the_latest_query_used_the_projection():
+            self._flush_system_logs(project)
+            result = project.run_sql(
+                f"SELECT query, projections FROM {self._get_table_reference('system.query_log')} "
+                f"WHERE query like '%{unique_query_identifier}%' "
+                f"and query not like '%clusterAllReplicas%' and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
+                fetch="all",
+            )
+            assert len(result) > 0
+            assert query in result[0][0]
+
+            assert result[0][1] == [f'{project.test_schema}.{relation.name}.projection_avg_age']
+
+        retry_until_assertion_passes(
+            check_that_the_latest_query_used_the_projection, **RETRY_CONFIG
         )
-        assert len(result) > 0
-        assert query in result[0][0]
-
-        assert result[0][1] == [f'{project.test_schema}.{relation.name}.projection_avg_age']
 
     def test_create_and_verify_multiple_projections(self, project):
         run_dbt(["seed"])
@@ -134,20 +154,23 @@ def test_create_and_verify_multiple_projections(self, project):
         assert len(result) == 3  # We expect 3 departments in the result
         assert result == [('engineering', 43.666666666666664), ('malware', 40.0), ('sales', 25.0)]
 
-        # waiting for system.log table to be created/populated
-        time.sleep(SLEEP_TIME)
-
         # check that the latest query used the projection
-        result = project.run_sql(
-            f"SELECT query, projections FROM clusterAllReplicas(default, 'system.query_log') "
-            f"WHERE query like '%{unique_query_identifier}%' "
-            f"and query not like '%clusterAllReplicas%' and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
-            fetch="all",
+        def check_that_the_latest_query_used_the_projection():
+            self._flush_system_logs(project)
+            result = project.run_sql(
+                f"SELECT query, projections FROM {self._get_table_reference('system.query_log')} "
+                f"WHERE query like '%{unique_query_identifier}%' "
+                f"and query not like '%clusterAllReplicas%' and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
+                fetch="all",
+            )
+            assert len(result) > 0
+            assert query in result[0][0]
+
+            assert result[0][1] == [f'{project.test_schema}.{relation.name}.projection_avg_age']
+
+        retry_until_assertion_passes(
+            check_that_the_latest_query_used_the_projection, **RETRY_CONFIG
         )
-        assert len(result) > 0
-        assert query in result[0][0]
-
-        assert result[0][1] == [f'{project.test_schema}.{relation.name}.projection_avg_age']
 
         # test the second projection
         unique_query_identifier = str(uuid.uuid4())
@@ -160,20 +183,22 @@ def test_create_and_verify_multiple_projections(self, project):
         assert len(result) == 3  # We expect 3 departments in the result
         assert result == [('engineering', 131), ('malware', 40), ('sales', 25)]
 
-        # waiting for system.log table to be created/populated
-        time.sleep(SLEEP_TIME)
-
-        # check that the latest query used the projection
-        result = project.run_sql(
-            f"SELECT query, projections FROM clusterAllReplicas(default, 'system.query_log') "
-            f"WHERE query like '%{unique_query_identifier}%' "
-            f"and query not like '%clusterAllReplicas%' and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
-            fetch="all",
+        def check_that_the_latest_query_used_the_projection():
+            self._flush_system_logs(project)
+            result = project.run_sql(
+                f"SELECT query, projections FROM {self._get_table_reference('system.query_log')} "
+                f"WHERE query like '%{unique_query_identifier}%' "
+                f"and query not like '%clusterAllReplicas%' and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
+                fetch="all",
+            )
+            assert len(result) > 0
+            assert query in result[0][0]
+
+            assert result[0][1] == [f'{project.test_schema}.{relation.name}.projection_sum_age']
+
+        retry_until_assertion_passes(
+            check_that_the_latest_query_used_the_projection, **RETRY_CONFIG
         )
-        assert len(result) > 0
-        assert query in result[0][0]
-
-        assert result[0][1] == [f'{project.test_schema}.{relation.name}.projection_sum_age']
 
     @pytest.mark.xfail
     @pytest.mark.skipif(
@@ -193,17 +218,21 @@ def test_create_and_verify_distributed_projection(self, project):
         assert len(result) == 3  # We expect 3 departments in the result
         assert result == [('engineering', 43.666666666666664), ('malware', 40.0), ('sales', 25.0)]
 
-        # waiting for system.log table to be created/populated
-        time.sleep(SLEEP_TIME)
-
-        # check that the latest query used the projection
-        result = project.run_sql(
-            f"SELECT query, projections FROM clusterAllReplicas(default, 'system.query_log') "
-            f"WHERE query like '%{unique_query_identifier}%' "
-            f"and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
-            fetch="all",
+        def check_that_the_latest_query_used_the_projection():
+            self._flush_system_logs(project)
+            result = project.run_sql(
+                f"SELECT query, projections FROM {self._get_table_reference('system.query_log')} "
+                f"WHERE query like '%{unique_query_identifier}%' "
+                f"and query not like '%system.query_log%' and read_rows > 0 ORDER BY query_start_time DESC",
+                fetch="all",
+            )
+            assert len(result) > 0
+            assert query in result[0][0]
+
+            assert result[0][1] == [
+                f'{project.test_schema}.{relation.name}_local.projection_avg_age'
+            ]
+
+        retry_until_assertion_passes(
+            check_that_the_latest_query_used_the_projection, **RETRY_CONFIG
         )
-        assert len(result) > 0
-        assert query in result[0][0]
-
-        assert result[0][1] == [f'{project.test_schema}.{relation.name}_local.projection_avg_age']
diff --git a/tests/integration/adapter/replicated_database/test_replicated_database.py b/tests/integration/adapter/replicated_database/test_replicated_database.py
diff --git a/tests/integration/adapter/view/test_view_sql_security.py b/tests/integration/adapter/view/test_view_sql_security.py
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
diff --git a/tests/integration/test_config.xml b/tests/integration/test_config.xml